diff --git a/CMakeLists.txt b/CMakeLists.txt index 245ad0620cc4a..1274245c581fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -343,6 +343,18 @@ if(WITH_BLKIN) include_directories(SYSTEM src/blkin/blkin-lib) endif(WITH_BLKIN) +option(WITH_JAEGER "Use jaeger to create tracepoints and display it on Jaeger UI" ON) + +if(WITH_JAEGER) + #TODO: handle the case if libraries found locally, why be redundant? + #set(Jaeger_DIR "${CMAKE_CURRENT_LIST_DIR}/Jaegertracing/Jaeger-client-cpp") + find_package(OpenTracing REQUIRED) + find_package(Jaeger REQUIRED) + find_package(yaml-cpp REQUIRED) + set(Complete_Jaeger_LIBRARIES ${Jaeger_LIBRARIES} ${OpenTracing_LIBRARIES} ${yaml-cpp_LIBRARIES}) + include_directories(SYSTEM ${Jaeger_INCLUDE_DIRS} ${yaml-cpp_INCLUDE_DIRS} ${OpenTracing_INCLUDE_DIRS}) +endif(WITH_JAEGER) + option(WITH_BOOST_CONTEXT "Enable boost::asio stackful coroutines" ON) if(WITH_BOOST_CONTEXT) set(HAVE_BOOST_CONTEXT ON) diff --git a/cmake/modules/FindJaeger.cmake b/cmake/modules/FindJaeger.cmake new file mode 100644 index 0000000000000..6c8b187eda6c5 --- /dev/null +++ b/cmake/modules/FindJaeger.cmake @@ -0,0 +1,78 @@ +#.rst: +# FindJaeger +# ------------ +# +# This module finds the `Jaeger` library. +# +# Imported target +# ^^^^^^^^^^^^^^^ +# +# This module defines the following :prop_tgt:`IMPORTED` target: +# +# ``Jaeger`` +# The Jaeger library, if found +# +# Result variables +# ^^^^^^^^^^^^^^^^ +# +# This module sets the following +# +# ``Jaeger_FOUND`` +# ``TRUE`` if system has Jaeger +# ``Jaeger_INCLUDE_DIRS`` +# The Jaeger include directories +# ``Jaeger_LIBRARIES`` +# The libraries needed to use Jaeger +# ``Jaeger_VERSION_STRING`` +# The Jaeger version +# ``Jaeger_HAS_TRACEF`` +# ``TRUE`` if the ``tracef()`` API is available in the system's LTTng-UST +# ``Jaeger_HAS_TRACELOG`` +# ``TRUE`` if the ``tracelog()`` API is available in the system's LTTng-UST + +#============================================================================= +# Copyright 2018 Mania Abdi, Inc. +# Copyright 2018 Mania Abdi +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +find_path(Jaeger_INCLUDE_DIRS NAMES jaegertracing/Tracer.h) +find_library(Jaeger_LIBRARIES NAMES jaegertracing) + +if(Jaeger_INCLUDE_DIRS AND Jaeger_LIBRARIES) + # find tracef() and tracelog() support + set(Jaeger_HAS_TRACEF 0) + set(Jaeger_HAS_TRACELOG 0) + + set(Jaeger_VERSION_STRING "0.5.0") + + if(NOT TARGET Jaeger) + add_library(Jaeger UNKNOWN IMPORTED) + set_target_properties(Jaeger PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Jaeger_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${Jaeger_LIBRARIES}") + endif() + + # add libdl to required libraries + set(Jaeger_LIBRARIES ${Jaeger_LIBRARIES} ${CMAKE_DL_LIBS}) +endif() + +# handle the QUIETLY and REQUIRED arguments and set LTTNGUST_FOUND to +# TRUE if all listed variables are TRUE +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Jaeger FOUND_VAR Jaeger_FOUND + REQUIRED_VARS Jaeger_LIBRARIES + Jaeger_INCLUDE_DIRS + VERSION_VAR Jaeger_VERSION_STRING) +mark_as_advanced(Jaeger_LIBRARIES Jaeger_INCLUDE_DIRS) + diff --git a/cmake/modules/FindOpenTracing.cmake b/cmake/modules/FindOpenTracing.cmake new file mode 100644 index 0000000000000..4b56bc1fb33e7 --- /dev/null +++ b/cmake/modules/FindOpenTracing.cmake @@ -0,0 +1,78 @@ +#.rst: +# FindOpenTracing +# ------------ +# +# This module finds the `OpenTracing` library. +# +# Imported target +# ^^^^^^^^^^^^^^^ +# +# This module defines the following :prop_tgt:`IMPORTED` target: +# +# ``OpenTracing`` +# The Opentracing library, if found +# +# Result variables +# ^^^^^^^^^^^^^^^^ +# +# This module sets the following +# +# ``OpenTracing_FOUND`` +# ``TRUE`` if system has OpenTracing +# ``OpenTracing_INCLUDE_DIRS`` +# The OpenTracing include directories +# ``OpenTracing_LIBRARIES`` +# The libraries needed to use OpenTracing +# ``OpenTracing_VERSION_STRING`` +# The OpenTracing version +# ``OpenTracing_HAS_TRACEF`` +# ``TRUE`` if the ``tracef()`` API is available in the system's LTTng-UST +# ``OpenTracing_HAS_TRACELOG`` +# ``TRUE`` if the ``tracelog()`` API is available in the system's LTTng-UST + +#============================================================================= +# Copyright 2018 Mania Abdi, Inc. +# Copyright 2018 Mania Abdi +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +find_path(OpenTracing_INCLUDE_DIRS NAMES opentracing/tracer.h) +find_library(OpenTracing_LIBRARIES NAMES opentracing) + +if(OpenTracing_INCLUDE_DIRS AND OpenTracing_LIBRARIES) + # find tracef() and tracelog() support + set(OpenTracing_HAS_TRACEF 0) + set(OpenTracing_HAS_TRACELOG 0) + + # will need specifically 1.5.x for successful working with Jaeger + set(OpenTracing_VERSION_STRING "1.5.x") + + if(NOT TARGET OpenTracing) + add_library(OpenTracing UNKNOWN IMPORTED) + set_target_properties(OpenTracing PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenTracing_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${OpenTracing_LIBRARIES}") + endif() + + # add libdl to required libraries + set(OpenTracing_LIBRARIES ${OpenTracing_LIBRARIES} ${CMAKE_DL_LIBS}) +endif() + +# handle the QUIETLY and REQUIRED arguments and set LTTNGUST_FOUND to +# TRUE if all listed variables are TRUE +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(OpenTracing FOUND_VAR OpenTracing_FOUND + REQUIRED_VARS OpenTracing_LIBRARIES + OpenTracing_INCLUDE_DIRS + VERSION_VAR OpenTracing_VERSION_STRING) +mark_as_advanced(OpenTracing_LIBRARIES OpenTracing_INCLUDE_DIRS) diff --git a/cmake/modules/Findyaml-cpp.cmake b/cmake/modules/Findyaml-cpp.cmake new file mode 100644 index 0000000000000..5eec36246e2a4 --- /dev/null +++ b/cmake/modules/Findyaml-cpp.cmake @@ -0,0 +1,64 @@ +# +# This file is open source software, licensed to you under the terms +# of the Apache License, Version 2.0 (the "License"). See the NOTICE file +# distributed with this work for additional information regarding copyright +# ownership. You may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Copyright (C) 2018 Scylladb, Ltd. +# + +find_package (PkgConfig REQUIRED) + +pkg_search_module (yaml-cpp_PC + QUIET + yaml-cpp) + +find_library (yaml-cpp_LIBRARY + NAMES yaml-cpp + HINTS + ${yaml-cpp_PC_LIBDIR} + ${yaml-cpp_PC_LIBRARY_DIRS}) + +find_path (yaml-cpp_INCLUDE_DIR + NAMES yaml-cpp/yaml.h + PATH_SUFFIXES yaml-cpp + HINTS + ${yaml-cpp_PC_INCLUDEDIR} + ${yaml-cpp_PC_INCLUDE_DIRS}) + +mark_as_advanced ( + yaml-cpp_LIBRARY + yaml-cpp_INCLUDE_DIR) + +include (FindPackageHandleStandardArgs) + +find_package_handle_standard_args (yaml-cpp + REQUIRED_VARS + yaml-cpp_LIBRARY + yaml-cpp_INCLUDE_DIR + VERSION_VAR yaml-cpp_PC_VERSION) + +set (yaml-cpp_LIBRARIES ${yaml-cpp_LIBRARY}) +set (yaml-cpp_INCLUDE_DIRS ${yaml-cpp_INCLUDE_DIR}) + +if (yaml-cpp_FOUND AND NOT (TARGET yaml-cpp::yaml-cpp)) + add_library (yaml-cpp::yaml-cpp UNKNOWN IMPORTED) + + set_target_properties (yaml-cpp::yaml-cpp + PROPERTIES + IMPORTED_LOCATION ${yaml-cpp_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${yaml-cpp_INCLUDE_DIRS}) +endif () diff --git a/debian/control b/debian/control index 671d7cb4e7107..a266b569428be 100644 --- a/debian/control +++ b/debian/control @@ -21,7 +21,7 @@ Build-Depends: cmake (>= 3.5), gperf, g++ (>= 7), javahelper, -# Make-Check jq, + jq, junit4, libaio-dev, libbabeltrace-ctf-dev, @@ -60,10 +60,10 @@ Build-Depends: cmake (>= 3.5), # Crimson libyaml-cpp-dev, librabbitmq-dev, librdkafka-dev, -# Make-Check libxmlsec1, -# Make-Check libxmlsec1-nss, -# Make-Check libxmlsec1-openssl, -# Make-Check libxmlsec1-dev, + libxmlsec1, + libxmlsec1-nss, + libxmlsec1-openssl, + libxmlsec1-dev, lsb-release, parted, patch, @@ -71,32 +71,32 @@ Build-Depends: cmake (>= 3.5), # Crimson protobuf-compiler, python3-all-dev, python3-cherrypy3, -# Make-Check python3-jwt, -# Make-Check python3-nose, -# Make-Check python3-pecan, -# Make-Check python3-bcrypt, -# Make-Check python3-six, -# Make-Check tox, -# Make-Check python3-coverage, -# Make-Check python3-dateutil, -# Make-Check python3-openssl, -# Make-Check python3-prettytable, -# Make-Check python3-requests, -# Make-Check python3-scipy, + python3-jwt, + python3-nose, + python3-pecan, + python3-bcrypt, + python3-six, + tox, + python3-coverage, + python3-dateutil, + python3-openssl, + python3-prettytable, + python3-requests, + python3-scipy, python3-setuptools, python3-sphinx, -# Make-Check python3-werkzeug, + python3-werkzeug, python3-setuptools, # Crimson ragel, -# Make-Check socat, + socat, # Crimson systemtap-sdt-dev, -# Make-Check uuid-dev, + uuid-dev, uuid-runtime, valgrind, virtualenv, xfslibs-dev, -# Make-Check xfsprogs, -# Make-Check xmlstarlet, + xfsprogs, + xmlstarlet, yasm [amd64], zlib1g-dev, Standards-Version: 3.9.3 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f370b8dbbbe5..b09d8bdd6791f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -379,6 +379,7 @@ set(ceph_common_deps ${BLKID_LIBRARIES} ${Backtrace_LIBRARIES} ${BLKIN_LIBRARIES} + ${Complete_Jaeger_LIBRARIES} ${CRYPTO_LIBS} ${GSSAPI_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5e7fb6a78ebe8..6ba51c1d4a9d0 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -177,7 +177,8 @@ if(HAVE_ARMV8_CRC) COMPILE_FLAGS "${CMAKE_C_FLAGS} ${ARMV8_CRC_COMPILE_FLAGS}") endif() target_link_libraries(crc32 - arch) + arch + ${Complete_Jaeger_LIBRARIES}) add_library(common_utf8 STATIC utf8.c) diff --git a/src/common/common_init.cc b/src/common/common_init.cc index 2ad93a958c488..5882206cf6754 100644 --- a/src/common/common_init.cc +++ b/src/common/common_init.cc @@ -26,6 +26,52 @@ #define dout_subsys ceph_subsys_ #ifndef WITH_SEASTAR +#ifdef WITH_JAEGER + CephContext *common_preinit(const CephInitParameters &iparams, + enum code_environment_t code_env, int flags,Jager_Tracer& tracer,const Span& parent_span) + { + Span span=tracer.child_span("common_init.cc common_preinit()",parent_span); + // set code environment + ANNOTATE_BENIGN_RACE_SIZED(&g_code_env, sizeof(g_code_env), "g_code_env"); + g_code_env = code_env; + + // Create a configuration object + CephContext *cct = new CephContext(iparams.module_type, code_env, flags); + + auto& conf = cct->_conf; + // add config observers here + + // Set up our entity name. + conf->name = iparams.name; + + // different default keyring locations for osd and mds. this is + // for backward compatibility. moving forward, we want all keyrings + // in these locations. the mon already forces $mon_data/keyring. + if (conf->name.is_mds()) { + conf.set_val_default("keyring", "$mds_data/keyring"); + } else if (conf->name.is_osd()) { + conf.set_val_default("keyring", "$osd_data/keyring"); + } + + if ((flags & CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS)) { + // make this unique despite multiple instances by the same name. + conf.set_val_default("admin_socket", + "$run_dir/$cluster-$name.$pid.$cctid.asok"); + } + + if (code_env == CODE_ENVIRONMENT_LIBRARY || + code_env == CODE_ENVIRONMENT_UTILITY_NODOUT) { + conf.set_val_default("log_to_stderr", "false"); + conf.set_val_default("err_to_stderr", "false"); + conf.set_val_default("log_flush_on_exit", "false"); + } + + conf.set_val("no_config_file", iparams.no_config_file ? "true" : "false"); + + return cct; + } +#endif + CephContext *common_preinit(const CephInitParameters &iparams, enum code_environment_t code_env, int flags) { diff --git a/src/common/common_init.h b/src/common/common_init.h index b9e1417396a55..da4f54928337d 100644 --- a/src/common/common_init.h +++ b/src/common/common_init.h @@ -19,6 +19,7 @@ #include "include/common_fwd.h" #include "common/code_environment.h" +#include "include/tracer.h" enum common_init_flags_t { // Set up defaults that make sense for an unprivileged daemon @@ -65,6 +66,8 @@ class CephInitParameters; */ CephContext *common_preinit(const CephInitParameters &iparams, enum code_environment_t code_env, int flags); +CephContext *common_preinit(const CephInitParameters &iparams, + enum code_environment_t code_env, int flags,Jager_Tracer&,const Span&); #endif // #ifndef WITH_SEASTAR /* Print out some parse error. */ diff --git a/src/common/jaegerTracer.h b/src/common/jaegerTracer.h new file mode 100644 index 0000000000000..44ddf3d2f7b79 --- /dev/null +++ b/src/common/jaegerTracer.h @@ -0,0 +1,26 @@ + + +#ifndef JAEGER_TRACER_ +#define JAEGER_TRACER_ + +#include +#include +#include + +class jTracer +{ +private: + static jaegertracing::Config configuration; +public: + jTracer() {} + static void loadYamlConfigFile(const char *); + void initTracer(const char *); + std::unique_ptr newSpan(const char *); + std::unique_ptr childSpan(const char *, const std::unique_ptr &); + std::unique_ptr followUpSpan(const char *, const std::unique_ptr &); + ~jTracer(){ + opentracing::Tracer::Global()->Close(); + } +}; + +#endif \ No newline at end of file diff --git a/src/common/jaegerTracing.cc b/src/common/jaegerTracing.cc new file mode 100644 index 0000000000000..eaab6cbdee8d3 --- /dev/null +++ b/src/common/jaegerTracing.cc @@ -0,0 +1,34 @@ + +#include +#include +#include "jaegerTracer.h" + +void jTracer::loadYamlConfigFile(const char *filePath) +{ + auto yaml = YAML::LoadFile(filePath); + jTracer::configuration = jaegertracing::Config::parse(yaml); +} + +void jTracer::initTracer(const char *tracerName) +{ + auto tracer = jaegertracing::Tracer::make( + tracerName, + jTracer::configuration, + jaegertracing::logging::consoleLogger()); + opentracing::Tracer::InitGlobal( + std::static_pointer_cast(tracer)); +} +std::unique_ptr newSpan(const char * spanName){ + auto Span=opentracing::Tracer::Global()->StartSpan(spanName); + return std::move(Span); +} +std::unique_ptr jTracer::childSpan(const char *spanName, const std::unique_ptr &parentSpan) +{ + auto Span = opentracing::Tracer::Global()->StartSpan(spanName, {opentracing::ChildOf(&parentSpan->context())}); + return std::move(Span); +} +std::unique_ptr jTracer::followUpSpan(const char *spanName, const std::unique_ptr &parentSpan) +{ + auto Span = opentracing::Tracer::Global()->StartSpan(spanName, {opentracing::FollowsFrom(&parentSpan->context())}); + return std::move(Span); +} diff --git a/src/global/global_init.cc b/src/global/global_init.cc index 0afeddb12a5f9..321ca941f001d 100644 --- a/src/global/global_init.cc +++ b/src/global/global_init.cc @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef HAVE_SYS_PRCTL_H #include @@ -84,8 +85,9 @@ void global_pre_init( const std::map *defaults, std::vector < const char* >& args, uint32_t module_type, code_environment_t code_env, - int flags) + int flags,Jager_Tracer& tracer,const Span& parent_span) { + Span span=tracer.child_span("global_init.cc global_pre_init",parent_span); std::string conf_file_list; std::string cluster = ""; @@ -96,7 +98,7 @@ void global_pre_init( args, module_type, &cluster, &conf_file_list); - CephContext *cct = common_preinit(iparams, code_env, flags); + CephContext *cct = common_preinit(iparams, code_env, flags,tracer,span); cct->_conf->cluster = cluster; global_init_set_globals(cct); auto& conf = cct->_conf; @@ -165,13 +167,103 @@ void global_pre_init( g_conf().complain_about_parse_error(g_ceph_context); } +#ifdef WITH_JAEGER + void global_pre_init( + const std::map *defaults, + std::vector < const char* >& args, + uint32_t module_type, code_environment_t code_env, + int flags) + { + std::string conf_file_list; + std::string cluster = ""; + + // ensure environment arguments are included in early processing + env_to_vec(args); + + CephInitParameters iparams = ceph_argparse_early_args( + args, module_type, + &cluster, &conf_file_list); + + CephContext *cct = common_preinit(iparams, code_env, flags); + cct->_conf->cluster = cluster; + global_init_set_globals(cct); + auto& conf = cct->_conf; + + if (flags & (CINIT_FLAG_NO_DEFAULT_CONFIG_FILE| + CINIT_FLAG_NO_MON_CONFIG)) { + conf->no_mon_config = true; + } + + // alternate defaults + if (defaults) { + for (auto& i : *defaults) { + conf.set_val_default(i.first, i.second); + } + } + + if (conf.get_val("no_config_file")) { + flags |= CINIT_FLAG_NO_DEFAULT_CONFIG_FILE; + } + + int ret = conf.parse_config_files(c_str_or_null(conf_file_list), + &cerr, flags); + if (ret == -EDOM) { + cct->_log->flush(); + cerr << "global_init: error parsing config file." << std::endl; + _exit(1); + } + else if (ret == -ENOENT) { + if (!(flags & CINIT_FLAG_NO_DEFAULT_CONFIG_FILE)) { + if (conf_file_list.length()) { + cct->_log->flush(); + cerr << "global_init: unable to open config file from search list " + << conf_file_list << std::endl; + _exit(1); + } else { + cerr << "did not load config file, using default settings." + << std::endl; + } + } + } + else if (ret) { + cct->_log->flush(); + cerr << "global_init: error reading config file." << std::endl; + _exit(1); + } + + // environment variables override (CEPH_ARGS, CEPH_KEYRING) + conf.parse_env(cct->get_module_type()); + + // command line (as passed by caller) + conf.parse_argv(args); + + if (conf->log_early && + !cct->_log->is_started()) { + cct->_log->start(); + } + + if (!cct->_log->is_started()) { + cct->_log->start(); + } + + // do the --show-config[-val], if present in argv + conf.do_argv_commands(); + + // Now we're ready to complain about config file parse errors + g_conf().complain_about_parse_error(g_ceph_context); + } +#endif + +#ifdef WITH_JAEGER boost::intrusive_ptr global_init(const std::map *defaults, std::vector < const char* >& args, uint32_t module_type, code_environment_t code_env, int flags, + Jager_Tracer& tracer,const Span& parent_span, const char *data_dir_option, bool run_pre_init) { + Span span=tracer.child_span("global_init.cc global_init()",parent_span); // Ensure we're not calling the global init functions multiple times. static bool first_run = true; if (run_pre_init) { @@ -204,7 +296,248 @@ global_init(const std::map *defaults, // drop privileges? ostringstream priv_ss; - + + // consider --setuser root a no-op, even if we're not root + if (getuid() != 0) { + if (g_conf()->setuser.length()) { + cerr << "ignoring --setuser " << g_conf()->setuser << " since I am not root" + << std::endl; + } + if (g_conf()->setgroup.length()) { + cerr << "ignoring --setgroup " << g_conf()->setgroup + << " since I am not root" << std::endl; + } + } else if (g_conf()->setgroup.length() || + g_conf()->setuser.length()) { + uid_t uid = 0; // zero means no change; we can only drop privs here. + gid_t gid = 0; + std::string uid_string; + std::string gid_string; + std::string home_directory; + if (g_conf()->setuser.length()) { + char buf[4096]; + struct passwd pa; + struct passwd *p = 0; + + uid = atoi(g_conf()->setuser.c_str()); + if (uid) { + getpwuid_r(uid, &pa, buf, sizeof(buf), &p); + } else { + getpwnam_r(g_conf()->setuser.c_str(), &pa, buf, sizeof(buf), &p); + if (!p) { + cerr << "unable to look up user '" << g_conf()->setuser << "'" + << std::endl; + exit(1); + } + + uid = p->pw_uid; + gid = p->pw_gid; + uid_string = g_conf()->setuser; + } + + if (p && p->pw_dir != nullptr) { + home_directory = std::string(p->pw_dir); + } + } + if (g_conf()->setgroup.length() > 0) { + gid = atoi(g_conf()->setgroup.c_str()); + if (!gid) { + char buf[4096]; + struct group gr; + struct group *g = 0; + getgrnam_r(g_conf()->setgroup.c_str(), &gr, buf, sizeof(buf), &g); + if (!g) { + cerr << "unable to look up group '" << g_conf()->setgroup << "'" + << ": " << cpp_strerror(errno) << std::endl; + exit(1); + } + gid = g->gr_gid; + gid_string = g_conf()->setgroup; + } + } + if ((uid || gid) && + g_conf()->setuser_match_path.length()) { + // induce early expansion of setuser_match_path config option + string match_path = g_conf()->setuser_match_path; + g_conf().early_expand_meta(match_path, &cerr); + struct stat st; + int r = ::stat(match_path.c_str(), &st); + if (r < 0) { + cerr << "unable to stat setuser_match_path " + << g_conf()->setuser_match_path + << ": " << cpp_strerror(errno) << std::endl; + exit(1); + } + if ((uid && uid != st.st_uid) || + (gid && gid != st.st_gid)) { + cerr << "WARNING: will not setuid/gid: " << match_path + << " owned by " << st.st_uid << ":" << st.st_gid + << " and not requested " << uid << ":" << gid + << std::endl; + uid = 0; + gid = 0; + uid_string.erase(); + gid_string.erase(); + } else { + priv_ss << "setuser_match_path " + << match_path << " owned by " + << st.st_uid << ":" << st.st_gid << ". "; + } + } + g_ceph_context->set_uid_gid(uid, gid); + g_ceph_context->set_uid_gid_strings(uid_string, gid_string); + if ((flags & CINIT_FLAG_DEFER_DROP_PRIVILEGES) == 0) { + if (setgid(gid) != 0) { + cerr << "unable to setgid " << gid << ": " << cpp_strerror(errno) + << std::endl; + exit(1); + } + if (setuid(uid) != 0) { + cerr << "unable to setuid " << uid << ": " << cpp_strerror(errno) + << std::endl; + exit(1); + } + if (setenv("HOME", home_directory.c_str(), 1) != 0) { + cerr << "warning: unable to set HOME to " << home_directory << ": " + << cpp_strerror(errno) << std::endl; + } + priv_ss << "set uid:gid to " << uid << ":" << gid << " (" << uid_string << ":" << gid_string << ")"; + } else { + priv_ss << "deferred set uid:gid to " << uid << ":" << gid << " (" << uid_string << ":" << gid_string << ")"; + } + } + +#if defined(HAVE_SYS_PRCTL_H) + if (prctl(PR_SET_DUMPABLE, 1) == -1) { + cerr << "warning: unable to set dumpable flag: " << cpp_strerror(errno) << std::endl; + } +# if defined(PR_SET_THP_DISABLE) + if (!g_conf().get_val("thp") && prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0) == -1) { + cerr << "warning: unable to disable THP: " << cpp_strerror(errno) << std::endl; + } +# endif +#endif + + // + // Utterly important to run first network connection after setuid(). + // In case of rdma transport uverbs kernel module starts returning + // -EACCESS on each operation if credentials has been changed, see + // callers of ib_safe_file_access() for details. + // + // fork() syscall also matters, so daemonization won't work in case + // of rdma. + // + if (!g_conf()->no_mon_config) { + // make sure our mini-session gets legacy values + g_conf().apply_changes(nullptr); + + MonClient mc_bootstrap(g_ceph_context); + if (mc_bootstrap.get_monmap_and_config() < 0) { + g_ceph_context->_log->flush(); + cerr << "failed to fetch mon config (--no-mon-config to skip)" + << std::endl; + _exit(1); + } + } + + // Expand metavariables. Invoke configuration observers. Open log file. + g_conf().apply_changes(nullptr); + + if (g_conf()->run_dir.length() && + code_env == CODE_ENVIRONMENT_DAEMON && + !(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) { + int r = ::mkdir(g_conf()->run_dir.c_str(), 0755); + if (r < 0 && errno != EEXIST) { + cerr << "warning: unable to create " << g_conf()->run_dir << ": " << cpp_strerror(errno) << std::endl; + } + } + + // call all observers now. this has the side-effect of configuring + // and opening the log file immediately. + g_conf().call_all_observers(); + + if (priv_ss.str().length()) { + dout(0) << priv_ss.str() << dendl; + } + + if ((flags & CINIT_FLAG_DEFER_DROP_PRIVILEGES) && + (g_ceph_context->get_set_uid() || g_ceph_context->get_set_gid())) { + // Fix ownership on log files and run directories if needed. + // Admin socket files are chown()'d during the common init path _after_ + // the service thread has been started. This is sadly a bit of a hack :( + chown_path(g_conf()->run_dir, + g_ceph_context->get_set_uid(), + g_ceph_context->get_set_gid(), + g_ceph_context->get_set_uid_string(), + g_ceph_context->get_set_gid_string()); + g_ceph_context->_log->chown_log_file( + g_ceph_context->get_set_uid(), + g_ceph_context->get_set_gid()); + } + + // Now we're ready to complain about config file parse errors + g_conf().complain_about_parse_error(g_ceph_context); + + // test leak checking + if (g_conf()->debug_deliberately_leak_memory) { + derr << "deliberately leaking some memory" << dendl; + char *s = new char[1234567]; + (void)s; + // cppcheck-suppress memleak + } + + if (code_env == CODE_ENVIRONMENT_DAEMON && !(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) + output_ceph_version(); + + if (g_ceph_context->crush_location.init_on_startup()) { + cerr << " failed to init_on_startup : " << cpp_strerror(errno) << std::endl; + exit(1); + } + + return boost::intrusive_ptr{g_ceph_context, false}; +} +#endif + +boost::intrusive_ptr +global_init(const std::map *defaults, + std::vector < const char* >& args, + uint32_t module_type, code_environment_t code_env, + int flags, + const char *data_dir_option, bool run_pre_init) +{ + // Ensure we're not calling the global init functions multiple times. + static bool first_run = true; + if (run_pre_init) { + // We will run pre_init from here (default). + ceph_assert(!g_ceph_context && first_run); + global_pre_init(defaults, args, module_type, code_env, flags); + } else { + // Caller should have invoked pre_init manually. + ceph_assert(g_ceph_context && first_run); + } + first_run = false; + + // Verify flags have not changed if global_pre_init() has been called + // manually. If they have, update them. + if (g_ceph_context->get_init_flags() != flags) { + g_ceph_context->set_init_flags(flags); + } + + // signal stuff + int siglist[] = { SIGPIPE, 0 }; + block_signals(siglist, NULL); + + if (g_conf()->fatal_signal_handlers) { + install_standard_sighandlers(); + } + register_assert_context(g_ceph_context); + + if (g_conf()->log_flush_on_exit) + g_ceph_context->_log->set_flush_on_exit(); + + // drop privileges? + ostringstream priv_ss; + // consider --setuser root a no-op, even if we're not root if (getuid() != 0) { if (g_conf()->setuser.length()) { @@ -460,7 +793,10 @@ void global_init_daemonize(CephContext *cct) << cpp_strerror(ret) << dendl; exit(1); } - + ofstream file; + file.open("/home/abhinav/Desktop/suab.txt",std::ios::app|std::ios::out); + file<<"global_init.cc 798.\n"; + file.close(); global_init_postfork_start(cct); global_init_postfork_finish(cct); #else @@ -578,7 +914,7 @@ int global_init_preload_erasure_code(const CephContext *cct) string plugin_name = *i; string replacement = ""; - if (plugin_name == "jerasure_generic" || + if (plugin_name == "jerasure_generic" || plugin_name == "jerasure_sse3" || plugin_name == "jerasure_sse4" || plugin_name == "jerasure_neon") { diff --git a/src/global/global_init.h b/src/global/global_init.h index d1d6dbbddbfb1..69379369bdc27 100644 --- a/src/global/global_init.h +++ b/src/global/global_init.h @@ -22,6 +22,7 @@ #include "include/ceph_assert.h" #include "common/code_environment.h" #include "common/common_init.h" +#include "include/tracer.h" /* * global_init is the first initialization function that @@ -38,6 +39,18 @@ global_init( const char *data_dir_option = 0, bool run_pre_init = true); +boost::intrusive_ptr +global_init( + const std::map *defaults, + std::vector < const char* >& args, + uint32_t module_type, + code_environment_t code_env, + int flags, + Jager_Tracer&, + const Span&, + const char *data_dir_option = 0, + bool run_pre_init = true); + namespace TOPNSPC::common { void intrusive_ptr_add_ref(CephContext* cct); void intrusive_ptr_release(CephContext* cct); @@ -49,6 +62,10 @@ void global_pre_init(const std::map *defaults, std::vector < const char* >& args, uint32_t module_type, code_environment_t code_env, int flags); +void global_pre_init(const std::map *defaults, + std::vector < const char* >& args, + uint32_t module_type, code_environment_t code_env, + int flags,Jager_Tracer&,const Span&); /* * perform all of the steps that global_init_daemonize performs just prior @@ -70,7 +87,7 @@ void global_init_postfork_finish(CephContext *cct); /* - * global_init_daemonize handles daemonizing a process. + * global_init_daemonize handles daemonizing a process. * * If this is called, it *must* be called before common_init_finish. * Note that this is equivalent to calling _prefork(), daemon(), and diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc index c3f17f632ec38..15e2c5dda7f64 100644 --- a/src/global/signal_handler.cc +++ b/src/global/signal_handler.cc @@ -35,8 +35,8 @@ #include #include "common/errno.h" #if defined(_AIX) -extern char *sys_siglist[]; -#endif +extern char *sys_siglist[]; +#endif #define dout_context g_ceph_context @@ -374,8 +374,8 @@ string get_name_by_pid(pid_t pid) if (fd < 0) { fd = -errno; - derr << "Fail to open '" << proc_pid_path - << "' error = " << cpp_strerror(fd) + derr << "Fail to open '" << proc_pid_path + << "' error = " << cpp_strerror(fd) << dendl; return ""; } @@ -395,7 +395,7 @@ string get_name_by_pid(pid_t pid) return string(buf, ret); } #endif - + /** * safe async signal handler / dispatcher * @@ -422,7 +422,7 @@ struct SignalHandler : public Thread { safe_handler() { memset(pipefd, 0, sizeof(pipefd)); memset(&handler, 0, sizeof(handler)); - memset(&info_t, 0, sizeof(info_t)); + memset(&info_t, 0, sizeof(info_t)); } siginfo_t info_t; @@ -531,7 +531,7 @@ struct SignalHandler : public Thread { } } lock.unlock(); - } + } } return NULL; } @@ -587,7 +587,7 @@ void SignalHandler::register_handler(int signum, signal_handler_t handler, bool // signal thread so that it sees our new handler signal_thread(); - + // install our handler struct sigaction oldact; struct sigaction act; @@ -623,7 +623,14 @@ void SignalHandler::unregister_handler(int signum, signal_handler_t handler) // ------- - +#ifdef WITH_JAEGER +void init_async_signal_handler(Jager_Tracer& tracer,const Span& parent_span) + { + Span span=tracer.child_span("signal_handler.cc init_async_signal_handler()",parent_span); + ceph_assert(!g_signal_handler); + g_signal_handler = new SignalHandler; + } +#endif void init_async_signal_handler() { ceph_assert(!g_signal_handler); @@ -642,7 +649,14 @@ void queue_async_signal(int signum) ceph_assert(g_signal_handler); g_signal_handler->queue_signal(signum); } - +#ifdef WITH_JAEGER +void register_async_signal_handler(int signum, signal_handler_t handler,Jager_Tracer& tracer,const Span& parent_span) + { + Span span=tracer.child_span("signal_handler.cc register_async_signal_handler()",parent_span); + ceph_assert(g_signal_handler); + g_signal_handler->register_handler(signum, handler, false); + } +#endif void register_async_signal_handler(int signum, signal_handler_t handler) { ceph_assert(g_signal_handler); @@ -660,6 +674,3 @@ void unregister_async_signal_handler(int signum, signal_handler_t handler) ceph_assert(g_signal_handler); g_signal_handler->unregister_handler(signum, handler); } - - - diff --git a/src/global/signal_handler.h b/src/global/signal_handler.h index 476724201aa99..b3247a4b28b86 100644 --- a/src/global/signal_handler.h +++ b/src/global/signal_handler.h @@ -17,6 +17,7 @@ #include #include "acconfig.h" +#include "include/tracer.h" typedef void (*signal_handler_t)(int); @@ -37,6 +38,7 @@ void install_standard_sighandlers(void); /// initialize async signal handler framework void init_async_signal_handler(); +void init_async_signal_handler(Jager_Tracer&,const Span&); /// shutdown async signal handler framework void shutdown_async_signal_handler(); @@ -46,6 +48,8 @@ void queue_async_signal(int signum); /// install a safe, async, callback for the given signal void register_async_signal_handler(int signum, signal_handler_t handler); +void register_async_signal_handler(int signum, signal_handler_t handler,Jager_Tracer& tracer,const Span& parent_span); + void register_async_signal_handler_oneshot(int signum, signal_handler_t handler); /// uninstall a safe async signal callback diff --git a/src/include/tracer.h b/src/include/tracer.h new file mode 100644 index 0000000000000..98c0d8c420cfc --- /dev/null +++ b/src/include/tracer.h @@ -0,0 +1,64 @@ +#ifndef TRACER_H_ +#define TRACER_H_ + +#define SIGNED_RIGHT_SHIFT_IS 1 +#define ARITHMETIC_RIGHT_SHIFT 1 + +#include +#include +#include + +#include + +#ifndef WITH_JAEGER + #define WITH_JAEGER +#endif + +typedef std::unique_ptr Span; + +class Jager_Tracer{ + public: + Jager_Tracer(){} + ~Jager_Tracer(){ + if(this->tracer == NULL) + return; + if(!this->isTracerClosed) + this->tracer->Close(); + this->isTracerClosed=true; + } + + void init_tracer(const char* tracerName,const char* filePath){ + auto yaml = YAML::LoadFile(filePath); + auto configuration = jaegertracing::Config::parse(yaml); + this->isTracerClosed=false; + this->tracer = jaegertracing::Tracer::make( + tracerName, + configuration, + jaegertracing::logging::consoleLogger()); + opentracing::Tracer::InitGlobal( + std::static_pointer_cast(tracer)); + } + inline void finish_tracer(){ + if(!this->isTracerClosed){ + this->isTracerClosed=true; + this->tracer->Close(); + } + } + Span new_span(const char* spanName){ + Span span=opentracing::Tracer::Global()->StartSpan(spanName); + return std::move(span); + } + Span child_span(const char* spanName,const Span& parentSpan){ + Span span = opentracing::Tracer::Global()->StartSpan(spanName, {opentracing::ChildOf(&parentSpan->context())}); + return std::move(span); + } + Span followup_span(const char *spanName, const Span& parentSpan){ + Span span = opentracing::Tracer::Global()->StartSpan(spanName, {opentracing::FollowsFrom(&parentSpan->context())}); + return std::move(span); + } +private: + std::shared_ptr tracer = NULL; + bool isTracerClosed; +}; + +#endif diff --git a/src/osd/CMakeLists.txt b/src/osd/CMakeLists.txt index c993ac2ff1b64..542303e1302bd 100644 --- a/src/osd/CMakeLists.txt +++ b/src/osd/CMakeLists.txt @@ -46,7 +46,8 @@ endif() add_library(osd STATIC ${osd_srcs}) target_link_libraries(osd PUBLIC dmclock::dmclock - PRIVATE os heap_profiler cpu_profiler ${CMAKE_DL_LIBS}) + PRIVATE os heap_profiler cpu_profiler ${CMAKE_DL_LIBS} + ${Complete_Jaeger_LIBRARIES}) if(WITH_LTTNG) add_dependencies(osd osd-tp pg-tp) endif() diff --git a/src/rgw/Config.yaml b/src/rgw/Config.yaml new file mode 100644 index 0000000000000..340eb511031eb --- /dev/null +++ b/src/rgw/Config.yaml @@ -0,0 +1,6 @@ +disabled: false +reporter: + logSpans: true +sampler: + type: const + param: 1 diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h index aeb30e2f99c99..c00c5b6b46cb6 100644 --- a/src/rgw/rgw_acl.h +++ b/src/rgw/rgw_acl.h @@ -12,6 +12,7 @@ #include #include "common/debug.h" +#include "include/tracer.h" #include "rgw_basic_types.h" @@ -433,6 +434,13 @@ class RGWAccessControlPolicy encode(acl, bl); ENCODE_FINISH(bl); } + void encode(bufferlist& bl, Jager_Tracer& tracer, const Span& parent_span) const { + Span span = tracer.child_span("rgw_acl.h encode", parent_span); + ENCODE_START(2, 2, bl); + encode(owner, bl); + encode(acl, bl); + ENCODE_FINISH(bl); + } void decode(bufferlist::const_iterator& bl) { DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); decode(owner, bl); @@ -457,6 +465,14 @@ class RGWAccessControlPolicy owner.set_id(id); owner.set_name(name); } + + void create_default(const rgw_user& id, string& name, Jager_Tracer& tracer, const Span& parent_span) { + Span span = tracer.child_span("rgw_acl.h create_default", parent_span); + acl.create_default(id, name); + owner.set_id(id); + owner.set_name(name); + } + RGWAccessControlList& get_acl() { return acl; } diff --git a/src/rgw/rgw_aio.h b/src/rgw/rgw_aio.h index c30de75ee2856..d841a3f045e42 100644 --- a/src/rgw/rgw_aio.h +++ b/src/rgw/rgw_aio.h @@ -26,6 +26,7 @@ #include "services/svc_rados.h" // cant forward declare RGWSI_RADOS::Obj #include "rgw_common.h" +#include "include/tracer.h" #include "include/function2.hpp" diff --git a/src/rgw/rgw_aio_throttle.h b/src/rgw/rgw_aio_throttle.h index 764469d7ed38b..5410a49b1dd42 100644 --- a/src/rgw/rgw_aio_throttle.h +++ b/src/rgw/rgw_aio_throttle.h @@ -132,4 +132,10 @@ inline auto make_throttle(uint64_t window_size, optional_yield y) #endif } +inline auto make_throttle(uint64_t window_size, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_aio_throttle.h make_throttle", parent_span); + return make_throttle(window_size, y); +} + } // namespace rgw diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc index 4fca9aece6637..95c7d7783506b 100644 --- a/src/rgw/rgw_asio_frontend.cc +++ b/src/rgw/rgw_asio_frontend.cc @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -14,6 +16,7 @@ #include "common/async/shared_mutex.h" #include "common/errno.h" #include "common/strtol.h" +#include "include/tracer.h" #include "rgw_asio_client.h" #include "rgw_asio_frontend.h" @@ -47,6 +50,10 @@ auto make_stack_allocator() { return boost::context::protected_fixedsize_stack{512*1024}; } +std::once_flag tracerInit; +Jager_Tracer tracer; +// Span root_span = nullptr; + template class StreamIO : public rgw::asio::ClientIO { CephContext* const cct; @@ -106,6 +113,10 @@ void handle_connection(boost::asio::io_context& context, boost::system::error_code& ec, spawn::yield_context yield) { + std::call_once(tracerInit,[](){ + tracer.init_tracer("RGW_Client_Process","/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + tracer_2.init_tracer("RGW_Client_Process","/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + }); // limit header to 4k, since we read it all into a single flat_buffer static constexpr size_t header_limit = 4096; // don't impose a limit on the body, since we read it in pieces @@ -130,6 +141,7 @@ void handle_connection(boost::asio::io_context& context, #endif ec == http::error::end_of_stream) { ldout(cct, 20) << "failed to read header: " << ec.message() << dendl; + // root_span=nullptr; return; } if (ec) { @@ -144,6 +156,7 @@ void handle_connection(boost::asio::io_context& context, ldout(cct, 5) << "failed to write response: " << ec.message() << dendl; } ldout(cct, 1) << "====== req done http_status=400 ======" << dendl; + // root_span=nullptr; return; } @@ -177,8 +190,23 @@ void handle_connection(boost::asio::io_context& context, &real_client)))); RGWRestfulIO client(cct, &real_client_io); auto y = optional_yield{context, yield}; - process_request(env.store, env.rest, &req, env.uri_prefix, - *env.auth_registry, &client, env.olog, y, scheduler); + #ifdef WITH_JAEGER + std::string span_name; + auto time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + span_name = std::ctime(&time); + span_name = "rgw_asio_frontend "+span_name; + Span root_span=tracer.new_span(span_name.c_str()); + // global_root_span = tracer_2.new_span(span_name.c_str()); + // span_structure s1; + // s1.set_span(s1,&req); + // if(root_span == nullptr) + // root_span=tracer.new_span(span_name.c_str()); + process_request(env.store, env.rest, &req, env.uri_prefix, + *env.auth_registry, &client, env.olog, y, tracer, std::move(root_span), scheduler); + #else + process_request(env.store, env.rest, &req, env.uri_prefix, + *env.auth_registry, &client, env.olog, y, scheduler); + #endif } if (!parser.keep_alive()) { diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 108b1c13e2d1c..fea5685d2de74 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -163,6 +163,20 @@ int rgw_read_user_buckets(rgw::sal::RGWRadosStore * store, return user.list_buckets(marker, end_marker, max, need_stats, buckets); } +int rgw_read_user_buckets(rgw::sal::RGWRadosStore * store, + const rgw_user& user_id, + rgw::sal::RGWBucketList& buckets, + const string& marker, + const string& end_marker, + uint64_t max, + Jager_Tracer& tracer,const Span& parent_span, + bool need_stats) +{ + Span span=tracer.child_span("rgw_bucket.cc rgw_read_user_buckets",parent_span); + rgw::sal::RGWRadosUser user(store, user_id); + return user.list_buckets(marker, end_marker, max, need_stats, buckets); +} + int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *bucket_name, string *bucket_id, int *shard_id) { auto pos = bucket_instance.rfind(':'); @@ -3181,6 +3195,15 @@ int RGWBucketCtl::remove_bucket_entrypoint_info(const rgw_bucket& bucket, }); } +int RGWBucketCtl::remove_bucket_entrypoint_info(const rgw_bucket& bucket, + optional_yield y, + Jager_Tracer& tracer, const Span& parent_span, + const Bucket::RemoveParams& param) +{ + Span span = tracer.child_span("rgw_bucket.cc", parent_span); + return RGWBucketCtl::remove_bucket_entrypoint_info(bucket, y, param); +} + int RGWBucketCtl::read_bucket_instance_info(const rgw_bucket& bucket, RGWBucketInfo *info, optional_yield y, @@ -3288,6 +3311,16 @@ int RGWBucketCtl::remove_bucket_instance_info(const rgw_bucket& bucket, optional_yield y, const BucketInstance::RemoveParams& params) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_bucket.cc RGWBucketCtl::remove_bucket_instance_info", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_bucket.cc RGWBucketCtl::remove_bucket_instance_info"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (params.objv_tracker) { info.objv_tracker = *params.objv_tracker; } @@ -3301,6 +3334,16 @@ int RGWBucketCtl::remove_bucket_instance_info(const rgw_bucket& bucket, }); } +int RGWBucketCtl::remove_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + Jager_Tracer& tracer, const Span& parent_span, + const BucketInstance::RemoveParams& params) +{ + Span span = tracer.child_span("rgw_bucket.cc RGWBucketCtl::remove_bucket_instance_info", parent_span); + return RGWBucketCtl::remove_bucket_instance_info(bucket, info, y, params); +} + int RGWBucketCtl::do_store_linked_bucket_info(RGWSI_Bucket_X_Ctx& ctx, RGWBucketInfo& info, RGWBucketInfo *orig_info, @@ -3433,6 +3476,17 @@ int RGWBucketCtl::link_bucket(const rgw_user& user_id, }); } +int RGWBucketCtl::link_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span, + bool update_entrypoint, + rgw_ep_info *pinfo) +{ + Span span = tracer.child_span("RGWBucketCtl::link_bucket", parent_span); + return RGWBucketCtl::link_bucket(user_id, bucket, creation_time, y, update_entrypoint, pinfo); +} + int RGWBucketCtl::do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, const rgw_user& user_id, const rgw_bucket& bucket, @@ -3502,11 +3556,27 @@ int RGWBucketCtl::do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, int RGWBucketCtl::unlink_bucket(const rgw_user& user_id, const rgw_bucket& bucket, optional_yield y, bool update_entrypoint) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_bucket.cc RGWBucketCtl::unlink_bucket", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_bucket.cc RGWBucketCtl::unlink_bucket"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { return do_unlink_bucket(ctx, user_id, bucket, y, update_entrypoint); }); } +int RGWBucketCtl::unlink_bucket(const rgw_user& user_id, const rgw_bucket& bucket, optional_yield y, Jager_Tracer& tracer, const Span& parent_span, bool update_entrypoint) +{ + Span span = tracer.child_span("rgw_bucket.cc RGWBucketCtl::unlink_bucket", parent_span); + return RGWBucketCtl::unlink_bucket(user_id, bucket, y, update_entrypoint); +} + int RGWBucketCtl::do_unlink_bucket(RGWSI_Bucket_EP_Ctx& ctx, const rgw_user& user_id, const rgw_bucket& bucket, @@ -3678,6 +3748,16 @@ int RGWBucketCtl::sync_user_stats(const rgw_user& user_id, const RGWBucketInfo& bucket_info, RGWBucketEnt* pent) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_bucket.cc RGWBucketCtl::sync_user_stats", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_bucket.cc RGWBucketCtl::sync_user_stats"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif RGWBucketEnt ent; if (!pent) { pent = &ent; @@ -3691,6 +3771,25 @@ int RGWBucketCtl::sync_user_stats(const rgw_user& user_id, return ctl.user->flush_bucket_stats(user_id, *pent); } +int RGWBucketCtl::sync_user_stats(const rgw_user& user_id, + const RGWBucketInfo& bucket_info, + Jager_Tracer& tracer, const Span& parent_span, + RGWBucketEnt* pent) +{ + Span span = tracer.child_span("rgw_bucket.cc RGWBucketCtl::sync_user_stats", parent_span); + RGWBucketEnt ent; + if (!pent) { + pent = &ent; + } + int r = svc.bi->read_stats(bucket_info, pent, null_yield, tracer, span); + if (r < 0) { + ldout(cct, 20) << __func__ << "(): failed to read bucket stats (r=" << r << ")" << dendl; + return r; + } + + return ctl.user->flush_bucket_stats(user_id, *pent); +} + int RGWBucketCtl::get_sync_policy_handler(std::optional zone, std::optional bucket, RGWBucketSyncPolicyHandlerRef *phandler, diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h index d30f410fddcf5..716870617b012 100644 --- a/src/rgw/rgw_bucket.h +++ b/src/rgw/rgw_bucket.h @@ -8,6 +8,8 @@ #include #include "include/types.h" +#include "include/tracer.h" + #include "rgw_common.h" #include "rgw_tools.h" #include "rgw_metadata.h" @@ -224,6 +226,14 @@ extern int rgw_read_user_buckets(rgw::sal::RGWRadosStore *store, const string& end_marker, uint64_t max, bool need_stats); +extern int rgw_read_user_buckets(rgw::sal::RGWRadosStore *store, + const rgw_user& user_id, + rgw::sal::RGWBucketList& buckets, + const string& marker, + const string& end_marker, + uint64_t max, + Jager_Tracer&,const Span&, + bool need_stats); extern int rgw_remove_object(rgw::sal::RGWRadosStore *store, const RGWBucketInfo& bucket_info, const rgw_bucket& bucket, rgw_obj_key& key); extern int rgw_remove_bucket_bypass_gc(rgw::sal::RGWRadosStore *store, rgw_bucket& bucket, int concurrent_max, optional_yield y); @@ -829,6 +839,10 @@ class RGWBucketCtl int remove_bucket_entrypoint_info(const rgw_bucket& bucket, optional_yield y, const Bucket::RemoveParams& params = {}); + int remove_bucket_entrypoint_info(const rgw_bucket& bucket, + optional_yield y, + Jager_Tracer&, const Span&, + const Bucket::RemoveParams& params = {}); /* bucket instance */ int read_bucket_instance_info(const rgw_bucket& bucket, @@ -843,6 +857,11 @@ class RGWBucketCtl RGWBucketInfo& info, optional_yield y, const BucketInstance::RemoveParams& params = {}); + int remove_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + Jager_Tracer&, const Span&, + const BucketInstance::RemoveParams& params = {}); /* * bucket_id may or may not be provided @@ -869,11 +888,21 @@ class RGWBucketCtl optional_yield y, bool update_entrypoint = true, rgw_ep_info *pinfo = nullptr); + int link_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y, Jager_Tracer&, const Span&, + bool update_entrypoint = true, + rgw_ep_info *pinfo = nullptr); int unlink_bucket(const rgw_user& user_id, const rgw_bucket& bucket, optional_yield y, bool update_entrypoint = true); + int unlink_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + optional_yield y, Jager_Tracer&, const Span&, + bool update_entrypoint = true); int chown(rgw::sal::RGWRadosStore *store, RGWBucketInfo& bucket_info, const rgw_user& user_id, const std::string& display_name, @@ -892,6 +921,9 @@ class RGWBucketCtl /* quota related */ int sync_user_stats(const rgw_user& user_id, const RGWBucketInfo& bucket_info, RGWBucketEnt* pent = nullptr); + int sync_user_stats(const rgw_user& user_id, const RGWBucketInfo& bucket_info, + Jager_Tracer&, const Span&, + RGWBucketEnt* pent = nullptr); /* bucket sync */ int get_sync_policy_handler(std::optional zone, diff --git a/src/rgw/rgw_civetweb_frontend.cc b/src/rgw/rgw_civetweb_frontend.cc index 4656adb58df5c..70eb0454bc7eb 100644 --- a/src/rgw/rgw_civetweb_frontend.cc +++ b/src/rgw/rgw_civetweb_frontend.cc @@ -9,6 +9,10 @@ #include "rgw_frontend.h" #include "rgw_client_io_filters.h" #include "rgw_dmclock_sync_scheduler.h" +#include "include/tracer.h" + +#include +#include #define dout_subsys ceph_subsys_rgw @@ -68,9 +72,22 @@ int RGWCivetWebFrontend::process(struct mg_connection* const conn) RGWRequest req(env.store->getRados()->get_new_req_id()); int http_ret = 0; //assert (scheduler != nullptr); - int ret = process_request(env.store, env.rest, &req, env.uri_prefix, - *env.auth_registry, &client_io, env.olog, - null_yield, scheduler.get() ,&http_ret); + int ret; + #ifdef WITH_JAEGER + Jager_Tracer tracer; + std::string tracerName; + auto time=std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + tracerName = std::ctime(&time); + tracer.init_tracer(tracerName.c_str(),"/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + Span span=tracer.new_span("RootSpan"); + ret = process_request(env.store, env.rest, &req, env.uri_prefix, + *env.auth_registry, &client_io, env.olog, + null_yield, tracer, span, scheduler.get() ,&http_ret); + #else + ret = process_request(env.store, env.rest, &req, env.uri_prefix, + *env.auth_registry, &client_io, env.olog, + null_yield, tracer, scheduler.get() ,&http_ret); + #endif if (ret < 0) { /* We don't really care about return code. */ dout(20) << "process_request() returned " << ret << dendl; diff --git a/src/rgw/rgw_client_io.h b/src/rgw/rgw_client_io.h index 3331bacddbbc0..a33f3d690e070 100644 --- a/src/rgw/rgw_client_io.h +++ b/src/rgw/rgw_client_io.h @@ -14,6 +14,7 @@ #include #include "include/types.h" +#include "include/tracer.h" #include "rgw_common.h" diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index f440bb2261227..8afda205cfce5 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -1113,6 +1113,15 @@ bool verify_user_permission(const DoutPrefixProvider* dpp, return verify_user_permission(dpp, &ps, s->user_acl.get(), s->iam_user_policies, res, op); } +bool verify_user_permission(const DoutPrefixProvider* dpp, + struct req_state * const s, + const rgw::ARN& res, + const uint64_t op, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_common.cc verify_user_permission", parent_span); + return verify_user_permission(dpp, s, res, op); +} + bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s, const int perm) @@ -1239,8 +1248,24 @@ bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct re perm); } +bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s, const int perm, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_common.cc verify_bucket_permission_no_policy", parent_span); + return verify_bucket_permission_no_policy(dpp, s, perm); +} + bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_common.cc verify_bucket_permission", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_common.cc verify_bucket_permission"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif perm_state_from_req_state ps(s); return verify_bucket_permission(dpp, @@ -1252,6 +1277,11 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * s->iam_user_policies, op); } +bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_common.cc verify_bucket_permission", parent_span); + return verify_bucket_permission(dpp, s, op); +} // Authorize anyone permitted by the policy and the bucket owner // unless explicitly denied by the policy. @@ -1468,6 +1498,22 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s op); } +bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span=tracer.child_span("rgw_common.cc verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op)",parent_span); + perm_state_from_req_state ps(s); + + return verify_object_permission(dpp, + &ps, + rgw_obj(s->bucket, s->object), + s->user_acl.get(), + s->bucket_acl.get(), + s->object_acl.get(), + s->iam_policy, + s->iam_user_policies, + op); +} + class HexTable { char table[256]; @@ -2116,4 +2162,138 @@ bool RGWBucketInfo::empty_sync_policy() const return sync_policy->empty(); } - +Jager_Tracer tracer_2; +// std::stack stack_span; + + +// void push_to_stack_span(const Span& span){ +// st.push(std::move(span)); +// } +// void pop_from_stack_span(){ +// if(!stack_span.empty()) +// st.pop(); +// } +// std::mutex mut1; +req_state* global_state = nullptr; +void span_structure::set_span(Span& span){ + // mut1.lock(); + this->s->stack_span.push(std::move(span)); + // mut1.unlock(); +} +void span_structure::set_req_state(req_state* s){ + // mut1.lock(); + this->s = s; + // mut1.unlock(); +} +span_structure::~span_structure(){ + // mut1.lock(); + if(this->s!=nullptr && !this->s->stack_span.empty()) + this->s->stack_span.pop(); + // mut1.unlock(); +} + + +std::unordered_map RGWOpTypeMapper={ + {0,"RGW_OP_UNKNOWN"}, + {1,"RGW_OP_GET_OBJ"}, + {2,"RGW_OP_LIST_BUCKETS"}, + {3,"RGW_OP_STAT_ACCOUNT"}, + {4,"RGW_OP_LIST_BUCKET"}, + {5,"RGW_OP_GET_BUCKET_LOGGING"}, + {6,"RGW_OP_GET_BUCKET_LOCATION"}, + {7,"RGW_OP_GET_BUCKET_VERSIONING"}, + {8,"RGW_OP_SET_BUCKET_VERSIONING"}, + {9,"RGW_OP_GET_BUCKET_WEBSITE"}, + {10,"RGW_OP_SET_BUCKET_WEBSITE"}, + {11,"RGW_OP_STAT_BUCKET"}, + {12,"RGW_OP_CREATE_BUCKET"}, + {13,"RGW_OP_DELETE_BUCKET"}, + {14,"RGW_OP_PUT_OBJ"}, + {15,"RGW_OP_STAT_OBJ"}, + {16,"RGW_OP_POST_OBJ"}, + {17,"RGW_OP_PUT_METADATA_ACCOUNT"}, + {18,"RGW_OP_PUT_METADATA_BUCKET"}, + {19,"RGW_OP_PUT_METADATA_OBJECT"}, + {20,"RGW_OP_SET_TEMPURL"}, + {21,"RGW_OP_DELETE_OBJ"}, + {22,"RGW_OP_COPY_OBJ"}, + {23,"RGW_OP_GET_ACLS"}, + {24,"RGW_OP_PUT_ACLS"}, + {25,"RGW_OP_GET_CORS"}, + {26,"RGW_OP_PUT_CORS"}, + {27,"RGW_OP_DELETE_CORS"}, + {28,"RGW_OP_OPTIONS_CORS"}, + {29,"RGW_OP_GET_REQUEST_PAYMENT"}, + {30,"RGW_OP_SET_REQUEST_PAYMENT"}, + {31,"RGW_OP_INIT_MULTIPART"}, + {32,"RGW_OP_COMPLETE_MULTIPART"}, + {33,"RGW_OP_ABORT_MULTIPART"}, + {34,"RGW_OP_LIST_MULTIPART"}, + {35,"RGW_OP_LIST_BUCKET_MULTIPARTS"}, + {36,"RGW_OP_DELETE_MULTI_OBJ"}, + {37,"RGW_OP_BULK_DELETE"}, + {38,"RGW_OP_SET_ATTRS"}, + {39,"RGW_OP_GET_CROSS_DOMAIN_POLICY"}, + {40,"RGW_OP_GET_HEALTH_CHECK"}, + {41,"RGW_OP_GET_INFO"}, + {42,"RGW_OP_CREATE_ROLE"}, + {43,"RGW_OP_DELETE_ROLE"}, + {44,"RGW_OP_GET_ROLE"}, + {45,"RGW_OP_MODIFY_ROLE"}, + {46,"RGW_OP_LIST_ROLES"}, + {47,"RGW_OP_PUT_ROLE_POLICY"}, + {48,"RGW_OP_GET_ROLE_POLICY"}, + {49,"RGW_OP_LIST_ROLE_POLICIES"}, + {50,"RGW_OP_DELETE_ROLE_POLICY"}, + {51,"RGW_OP_PUT_BUCKET_POLICY"}, + {52,"RGW_OP_GET_BUCKET_POLICY"}, + {53,"RGW_OP_DELETE_BUCKET_POLICY"}, + {54,"RGW_OP_PUT_OBJ_TAGGING"}, + {55,"RGW_OP_GET_OBJ_TAGGING"}, + {56,"RGW_OP_DELETE_OBJ_TAGGING"}, + {57,"RGW_OP_PUT_LC"}, + {58,"RGW_OP_GET_LC"}, + {59,"RGW_OP_DELETE_LC"}, + {60,"RGW_OP_PUT_USER_POLICY"}, + {61,"RGW_OP_GET_USER_POLICY"}, + {62,"RGW_OP_LIST_USER_POLICIES"}, + {63,"RGW_OP_DELETE_USER_POLICY"}, + {64,"RGW_OP_PUT_BUCKET_OBJ_LOCK"}, + {65,"RGW_OP_GET_BUCKET_OBJ_LOCK"}, + {66,"RGW_OP_PUT_OBJ_RETENTION"}, + {67,"RGW_OP_GET_OBJ_RETENTION"}, + {68,"RGW_OP_PUT_OBJ_LEGAL_HOLD"}, + {69,"RGW_OP_GET_OBJ_LEGAL_HOLD"}, + {70,"RGW_OP_ADMIN_SET_METADATA"}, + {71,"RGW_OP_GET_OBJ_LAYOUT"}, + {72,"RGW_OP_BULK_UPLOAD"}, + {73,"RGW_OP_METADATA_SEARCH"}, + {74,"RGW_OP_CONFIG_BUCKET_META_SEARCH"}, + {75,"RGW_OP_GET_BUCKET_META_SEARCH"}, + {76,"RGW_OP_DEL_BUCKET_META_SEARCH"}, + {77,"RGW_STS_ASSUME_ROLE"}, + {78,"RGW_STS_GET_SESSION_TOKEN"}, + {79,"RGW_STS_ASSUME_ROLE_WEB_IDENTITY"}, + {80,"RGW_OP_PUBSUB_TOPIC_CREATE"}, + {81,"RGW_OP_PUBSUB_TOPICS_LIST"}, + {82,"RGW_OP_PUBSUB_TOPIC_GET"}, + {83,"RGW_OP_PUBSUB_TOPIC_DELETE"}, + {84,"RGW_OP_PUBSUB_SUB_CREATE"}, + {85,"RGW_OP_PUBSUB_SUB_GET"}, + {86,"RGW_OP_PUBSUB_SUB_DELETE"}, + {87,"RGW_OP_PUBSUB_SUB_PULL"}, + {88,"RGW_OP_PUBSUB_SUB_ACK"}, + {89,"RGW_OP_PUBSUB_NOTIF_CREATE"}, + {90,"RGW_OP_PUBSUB_NOTIF_DELETE"}, + {91,"RGW_OP_PUBSUB_NOTIF_LIST"}, + {92,"RGW_OP_GET_BUCKET_TAGGING"}, + {93,"RGW_OP_PUT_BUCKET_TAGGING"}, + {94,"RGW_OP_DELETE_BUCKET_TAGGING"}, + {95,"RGW_OP_GET_BUCKET_REPLICATION"}, + {96,"RGW_OP_PUT_BUCKET_REPLICATION"}, + {97,"RGW_OP_DELETE_BUCKET_REPLICATION"}, + {98,"RGW_OP_GET_BUCKET_POLICY_STATUS"}, + {99,"RGW_OP_PUT_BUCKET_PUBLIC_ACCESS_BLOCK"}, + {100,"RGW_OP_GET_BUCKET_PUBLIC_ACCESS_BLOCK"}, + {101,"RGW_OP_DELETE_BUCKET_PUBLIC_ACCESS_BLOCK"} +}; \ No newline at end of file diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 8d840731fe07e..eb639ee878ede 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -23,6 +23,7 @@ #include "common/ceph_crypto.h" #include "common/random_string.h" +#include "include/tracer.h" #include "rgw_acl.h" #include "rgw_cors.h" #include "rgw_iam_policy.h" @@ -276,6 +277,15 @@ enum HostStyle { VirtualStyle = 1, }; +/** store the span_structure responsible for storing the spans in stack_span in req_state */ +struct span_structure{ + req_state* s = nullptr; + void set_req_state(req_state* s); + void set_span(Span& span); + ~span_structure(); +}; + + /** Store error returns for output at a different point in the program */ struct rgw_err { rgw_err(); @@ -1644,6 +1654,9 @@ struct req_state : DoutPrefixProvider { ACLOwner bucket_owner; ACLOwner owner; + std::stack stack_span; + + span_structure ss; string zonegroup_name; string zonegroup_endpoint; string bucket_instance_id; @@ -1994,6 +2007,14 @@ static inline void buf_to_hex(const unsigned char* const buf, } } +static inline void buf_to_hex(const unsigned char* const buf, + const size_t len, + char* const str, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_common.h inline void buf_to_hex", parent_span); + buf_to_hex(buf, len, str); +} + template static inline std::array buf_to_hex(const std::array& buf) { @@ -2226,6 +2247,10 @@ bool verify_user_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const rgw::ARN& res, const uint64_t op); +bool verify_user_permission(const DoutPrefixProvider* dpp, + struct req_state * const s, + const rgw::ARN& res, + const uint64_t op, Jager_Tracer& tracer, const Span& parent_span); bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s, int perm); @@ -2239,6 +2264,7 @@ bool verify_bucket_permission( const vector& user_policies, const uint64_t op); bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op); +bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op, Jager_Tracer&, const Span&); bool verify_bucket_permission_no_policy( const DoutPrefixProvider* dpp, struct req_state * const s, @@ -2248,6 +2274,9 @@ bool verify_bucket_permission_no_policy( bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s, const int perm); +bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, + struct req_state * const s, + const int perm, Jager_Tracer&, const Span&); int verify_bucket_owner_or_policy(struct req_state* const s, const uint64_t op); extern bool verify_object_permission( @@ -2261,6 +2290,7 @@ extern bool verify_object_permission( const vector& user_policies, const uint64_t op); extern bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op); +extern bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op, Jager_Tracer&, const Span&); extern bool verify_object_permission_no_policy( const DoutPrefixProvider* dpp, struct req_state * const s, @@ -2366,6 +2396,15 @@ extern bool match_policy(boost::string_view pattern, boost::string_view input, extern string camelcase_dash_http_attr(const string& orig); extern string lowercase_dash_http_attr(const string& orig); +extern std::unordered_map RGWOpTypeMapper; +extern Jager_Tracer tracer_2; +extern Span global_root_span; +extern req_state* global_state; +// std::stack stack_span; + +// void push_to_stack_span(const Span&); +// void pop_from_stack_span(const Span&); + void rgw_setup_saved_curl_handles(); void rgw_release_all_curl_handles(); diff --git a/src/rgw/rgw_compression.cc b/src/rgw/rgw_compression.cc index 7db8108d69582..3cdd9a3b3340a 100644 --- a/src/rgw/rgw_compression.cc +++ b/src/rgw/rgw_compression.cc @@ -30,6 +30,13 @@ int rgw_compression_info_from_attrset(map& attrs, } } +int rgw_compression_info_from_attrset(map& attrs, + bool& need_decompress, + RGWCompressionInfo& cs_info, Jager_Tracer& tracer, const Span& parent_span) { + Span span = tracer.child_span("rgw_compression.cc rgw_compression_info_from_attrset", parent_span); + return rgw_compression_info_from_attrset(attrs, need_decompress, cs_info); +} + //------------RGWPutObj_Compress--------------- int RGWPutObj_Compress::process(bufferlist&& in, uint64_t logical_offset) diff --git a/src/rgw/rgw_compression.h b/src/rgw/rgw_compression.h index 4d7f8638412a0..24c2d6752ea64 100644 --- a/src/rgw/rgw_compression.h +++ b/src/rgw/rgw_compression.h @@ -10,8 +10,10 @@ #include "rgw_putobj.h" #include "rgw_op.h" #include "rgw_compression_types.h" +#include "include/tracer.h" int rgw_compression_info_from_attrset(map& attrs, bool& need_decompress, RGWCompressionInfo& cs_info); +int rgw_compression_info_from_attrset(map& attrs, bool& need_decompress, RGWCompressionInfo& cs_info, Jager_Tracer&, const Span&); class RGWGetObj_Decompress : public RGWGetObj_Filter { diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc index b1da90104cf77..17572e5e9252f 100644 --- a/src/rgw/rgw_cr_rados.cc +++ b/src/rgw/rgw_cr_rados.cc @@ -18,6 +18,7 @@ #include "cls/rgw/cls_rgw_client.h" #include +#include #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw diff --git a/src/rgw/rgw_fcgi_process.cc b/src/rgw/rgw_fcgi_process.cc index 6bb1102526951..583d9c62d2f0b 100644 --- a/src/rgw/rgw_fcgi_process.cc +++ b/src/rgw/rgw_fcgi_process.cc @@ -5,6 +5,8 @@ #include "common/Throttle.h" #include "common/WorkQueue.h" +#include "include/tracer.h" + #include "rgw_rados.h" #include "rgw_rest.h" #include "rgw_frontend.h" @@ -14,6 +16,9 @@ #include "rgw_client_io.h" #include "rgw_client_io_filters.h" +#include +#include + #define dout_subsys ceph_subsys_rgw void RGWFCGXProcess::run() @@ -123,10 +128,22 @@ void RGWFCGXProcess::handle_request(RGWRequest* r) &fcgxfe))); RGWRestfulIO client_io(cct, &real_client_io); - - int ret = process_request(store, rest, req, uri_prefix, - *auth_registry, &client_io, olog, - null_yield, nullptr); + int ret; + #ifdef WITH_JAEGER + Jager_Tracer tracer; + std::string tracerName; + auto time=std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + tracerName = std::ctime(&time); + tracer.init_tracer(tracerName.c_str(),"/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + Span span=tracer.new_span("RootSpan"); + ret = process_request(store, rest, req, uri_prefix, + *auth_registry, &client_io, olog, + null_yield, tracer,span nullptr); + #else + ret = process_request(store, rest, req, uri_prefix, + *auth_registry, &client_io, olog, + null_yield, nullptr); + #endif if (ret < 0) { /* we don't really care about return code */ dout(20) << "process_request() returned " << ret << dendl; diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc index c4358d65421f4..c38500dd668af 100644 --- a/src/rgw/rgw_http_client.cc +++ b/src/rgw/rgw_http_client.cc @@ -1232,6 +1232,16 @@ void *RGWHTTPManager::reqs_thread_entry() return 0; } +#ifdef WITH_JAEGER +void rgw_http_client_init(CephContext *cct,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_http_client.cc rgw_http_client_init",parent_span); + curl_global_init(CURL_GLOBAL_ALL); + rgw_http_manager = new RGWHTTPManager(cct); + rgw_http_manager->start(); +} +#endif + void rgw_http_client_init(CephContext *cct) { curl_global_init(CURL_GLOBAL_ALL); diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h index 2ebb80dfa0757..0edeec6f2f99b 100644 --- a/src/rgw/rgw_http_client.h +++ b/src/rgw/rgw_http_client.h @@ -10,12 +10,14 @@ #include "rgw_common.h" #include "rgw_string.h" #include "rgw_http_client_types.h" +#include "include/tracer.h" #include using param_pair_t = pair; using param_vec_t = vector; +void rgw_http_client_init(CephContext *cct,Jager_Tracer&,const Span&); void rgw_http_client_init(CephContext *cct); void rgw_http_client_cleanup(); diff --git a/src/rgw/rgw_http_client_curl.h b/src/rgw/rgw_http_client_curl.h index 2a49a2c36c4da..3f8b8b64c52ee 100644 --- a/src/rgw/rgw_http_client_curl.h +++ b/src/rgw/rgw_http_client_curl.h @@ -20,10 +20,13 @@ #include #include "rgw_frontend.h" +// #include "include/tracer.h" + namespace rgw { namespace curl { using fe_map_t = std::multimap ; +// void setup_curl(boost::optional m,Jager_Tracer&,const Span&); void setup_curl(boost::optional m); void cleanup_curl(); } diff --git a/src/rgw/rgw_loadgen_process.cc b/src/rgw/rgw_loadgen_process.cc index 6bc9ccd477691..e26f228fe488a 100644 --- a/src/rgw/rgw_loadgen_process.cc +++ b/src/rgw/rgw_loadgen_process.cc @@ -5,6 +5,8 @@ #include "common/Throttle.h" #include "common/WorkQueue.h" +#include "include/tracer.h" + #include "rgw_rados.h" #include "rgw_rest.h" #include "rgw_frontend.h" @@ -14,6 +16,8 @@ #include "rgw_client_io.h" #include +#include +#include #define dout_subsys ceph_subsys_rgw @@ -132,10 +136,22 @@ void RGWLoadGenProcess::handle_request(RGWRequest* r) RGWLoadGenIO real_client_io(&env); RGWRestfulIO client_io(cct, &real_client_io); - - int ret = process_request(store, rest, req, uri_prefix, - *auth_registry, &client_io, olog, - null_yield, nullptr); + int ret; + #ifdef WITH_JAEGER + Jager_Tracer tracer; + std::string tracerName; + auto time=std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + tracerName = std::ctime(&time); + tracer.init_tracer(tracerName.c_str(),"/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + Span span=tracer.new_span("RootSpan"); + ret = process_request(store, rest, req, uri_prefix, + *auth_registry, &client_io, olog, + null_yield, tracer, span, nullptr); + #else + ret = process_request(store, rest, req, uri_prefix, + *auth_registry, &client_io, olog, + null_yield, nullptr); + #endif if (ret < 0) { /* we don't really care about return code */ dout(20) << "process_request() returned " << ret << dendl; diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index a3ca40e2cdf39..9f3c17ac7d3ec 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -38,6 +38,8 @@ #include "rgw_frontend.h" #include "rgw_http_client_curl.h" #include "rgw_perf_counters.h" + + #ifdef WITH_RADOSGW_AMQP_ENDPOINT #include "rgw_amqp.h" #endif @@ -55,6 +57,10 @@ #include #endif +#ifndef WITH_JAEGER + #define WITH_JAEGER +#endif + #define dout_subsys ceph_subsys_rgw namespace { @@ -183,7 +189,9 @@ int radosgw_Main(int argc, const char **argv) << std::endl; return ENOSYS; } - + Jager_Tracer tracer; + tracer.init_tracer("rgw_main_started","/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + Span parent_span=tracer.new_span("rgw main started"); /* alternative default for module */ map defaults = { { "debug_rgw", "1/5" }, @@ -203,17 +211,28 @@ int radosgw_Main(int argc, const char **argv) } int flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS; + #ifdef WITH_JAEGER global_pre_init( &defaults, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, - flags); + flags,tracer,parent_span); + #else + global_pre_init( + &defaults, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, + flags); + #endif // Now that we've determined which frontend(s) to use, continue with global // initialization. Passing false as the final argument ensures that // global_pre_init() is not invoked twice. // claim the reference and release it after subsequent destructors have fired + #ifdef WITH_JAEGER + auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON,flags, tracer, parent_span, "rgw_data", false); + #else auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, - flags, "rgw_data", false); + CODE_ENVIRONMENT_DAEMON, + flags, "rgw_data", false); + #endif // First, let's determine which frontends are configured. list frontends; @@ -296,10 +315,16 @@ int radosgw_Main(int argc, const char **argv) if (!g_conf()->rgw_region.empty() && g_conf()->rgw_zonegroup.empty()) { g_conf().set_val_or_die("rgw_zonegroup", g_conf()->rgw_region.c_str()); } - + tracer.finish_tracer(); if (g_conf()->daemonize) { global_init_daemonize(g_ceph_context); } + + Jager_Tracer tracerSec; + + tracerSec.init_tracer("rgw_initiation_2","/home/abhinav/GSOC/ceph/src/tracerConfig.yaml"); + Span parent_span2=tracerSec.new_span("rgw_main.cc after global_init_daemonize"); + ceph::mutex mutex = ceph::make_mutex("main"); SafeTimer init_timer(g_ceph_context, mutex); init_timer.init(); @@ -308,9 +333,13 @@ int radosgw_Main(int argc, const char **argv) mutex.unlock(); common_init_finish(g_ceph_context); - - init_async_signal_handler(); - register_async_signal_handler(SIGHUP, sighup_handler); + #ifdef WITH_JAEGER + init_async_signal_handler(tracerSec,parent_span2); + register_async_signal_handler(SIGHUP, sighup_handler,tracerSec,parent_span2); + #else + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + #endif TracepointProvider::initialize(g_ceph_context); TracepointProvider::initialize(g_ceph_context); @@ -320,11 +349,17 @@ int radosgw_Main(int argc, const char **argv) derr << "ERROR: unable to initialize rgw tools" << dendl; return -r; } + #ifdef WITH_JAEGER + rgw_init_resolver(); + rgw::curl::setup_curl(fe_map); + rgw_http_client_init(g_ceph_context,tracerSec,parent_span2); + #else + rgw_init_resolver(); + rgw::curl::setup_curl(fe_map); + rgw_http_client_init(g_ceph_context); + #endif + tracerSec.finish_tracer(); - rgw_init_resolver(); - rgw::curl::setup_curl(fe_map); - rgw_http_client_init(g_ceph_context); - #if defined(WITH_RADOSGW_FCGI_FRONTEND) FCGX_Init(); #endif @@ -448,7 +483,7 @@ int radosgw_Main(int argc, const char **argv) admin_resource->register_resource("usage", new RGWRESTMgr_Usage); admin_resource->register_resource("user", new RGWRESTMgr_User); admin_resource->register_resource("bucket", new RGWRESTMgr_Bucket); - + /*Registering resource for /admin/metadata */ admin_resource->register_resource("metadata", new RGWRESTMgr_Metadata); admin_resource->register_resource("log", new RGWRESTMgr_Log); @@ -531,6 +566,7 @@ int radosgw_Main(int argc, const char **argv) } RGWFrontend *fe = NULL; + // framework="civetweb"; if (framework == "civetweb" || framework == "mongoose") { framework = "civetweb"; @@ -683,4 +719,3 @@ int radosgw_main(int argc, const char** argv) } } /* extern "C" */ - diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc index 2e4858c150069..c54c3111360d9 100644 --- a/src/rgw/rgw_multi.cc +++ b/src/rgw/rgw_multi.cc @@ -308,6 +308,16 @@ int list_bucket_multiparts(rgw::sal::RGWRadosStore *store, RGWBucketInfo& bucket int abort_bucket_multiparts(rgw::sal::RGWRadosStore *store, CephContext *cct, RGWBucketInfo& bucket_info, string& prefix, string& delim) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_multi.cc abort_bucket_multiparts", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_multi.cc abort_bucket_multiparts"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif constexpr int max = 1000; int ret, num_deleted = 0; vector objs; @@ -364,3 +374,10 @@ int abort_bucket_multiparts(rgw::sal::RGWRadosStore *store, CephContext *cct, RG return 0; } + +int abort_bucket_multiparts(rgw::sal::RGWRadosStore *store, CephContext *cct, RGWBucketInfo& bucket_info, + string& prefix, string& delim, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_multi.cc abort_bucket_multiparts", parent_span); + return abort_bucket_multiparts(store, cct, bucket_info, prefix, delim); +} diff --git a/src/rgw/rgw_multi.h b/src/rgw/rgw_multi.h index 5f8fa11b336a5..eb32527afc1a1 100644 --- a/src/rgw/rgw_multi.h +++ b/src/rgw/rgw_multi.h @@ -8,6 +8,7 @@ #include "rgw_xml.h" #include "rgw_obj_manifest.h" #include "rgw_compression_types.h" +#include "include/tracer.h" namespace rgw { namespace sal { class RGWRadosStore; @@ -136,4 +137,7 @@ extern int list_bucket_multiparts(rgw::sal::RGWRadosStore *store, RGWBucketInfo& extern int abort_bucket_multiparts(rgw::sal::RGWRadosStore *store, CephContext *cct, RGWBucketInfo& bucket_info, string& prefix, string& delim); + +extern int abort_bucket_multiparts(rgw::sal::RGWRadosStore *store, CephContext *cct, RGWBucketInfo& bucket_info, + string& prefix, string& delim, Jager_Tracer&, const Span&); #endif diff --git a/src/rgw/rgw_obj_manifest.cc b/src/rgw/rgw_obj_manifest.cc index a91c51c4658ea..9fb2fc4812715 100644 --- a/src/rgw/rgw_obj_manifest.cc +++ b/src/rgw/rgw_obj_manifest.cc @@ -362,3 +362,12 @@ int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m return 0; } +int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, + const rgw_placement_rule& head_placement_rule, + const rgw_placement_rule *tail_placement_rule, + const rgw_bucket& _b, const rgw_obj& _obj, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_obj_manifest.cc RGWObjManifest::generator::create_begin", parent_span); + return RGWObjManifest::generator::create_begin(cct, _m, head_placement_rule, tail_placement_rule, _b, _obj); +} + diff --git a/src/rgw/rgw_obj_manifest.h b/src/rgw/rgw_obj_manifest.h index c2b6157ed0600..b621a3027bc80 100644 --- a/src/rgw/rgw_obj_manifest.h +++ b/src/rgw/rgw_obj_manifest.h @@ -539,6 +539,11 @@ class RGWObjManifest { const rgw_placement_rule *tail_placement_rule, const rgw_bucket& bucket, const rgw_obj& obj); + int create_begin(CephContext *cct, RGWObjManifest *manifest, + const rgw_placement_rule& head_placement_rule, + const rgw_placement_rule *tail_placement_rule, + const rgw_bucket& bucket, + const rgw_obj& obj, Jager_Tracer&, const Span&); int create_next(uint64_t ofs); diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index cb8e8a90d5333..0f20e0ef23bd9 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -234,6 +234,16 @@ int rgw_op_get_bucket_policy_from_attr(CephContext *cct, return 0; } +int rgw_op_get_bucket_policy_from_attr(CephContext *cct, + rgw::sal::RGWRadosStore *store, + RGWBucketInfo& bucket_info, + map& bucket_attrs, + RGWAccessControlPolicy *policy, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc rgw_op_get_bucket_policy_from_attr", parent_span); + return rgw_op_get_bucket_policy_from_attr(cct, store, bucket_info, bucket_attrs, policy); +} + static int get_obj_policy_from_attr(CephContext *cct, rgw::sal::RGWRadosStore *store, RGWObjectCtx& obj_ctx, @@ -279,6 +289,52 @@ static int get_obj_policy_from_attr(CephContext *cct, return ret; } +static int get_obj_policy_from_attr(CephContext *cct, + rgw::sal::RGWRadosStore *store, + RGWObjectCtx& obj_ctx, + RGWBucketInfo& bucket_info, + map& bucket_attrs, + RGWAccessControlPolicy *policy, + string *storage_class, + rgw_obj& obj, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc get_obj_policy_from_attr", parent_span); + bufferlist bl; + int ret = 0; + + RGWRados::Object op_target(store->getRados(), bucket_info, obj_ctx, obj); + RGWRados::Object::Read rop(&op_target); + + ret = rop.get_attr(RGW_ATTR_ACL, bl, y, tracer, span); + if (ret >= 0) { + ret = decode_policy(cct, bl, policy); + if (ret < 0) + return ret; + } else if (ret == -ENODATA) { + /* object exists, but policy is broken */ + ldout(cct, 0) << "WARNING: couldn't find acl header for object, generating default" << dendl; + rgw::sal::RGWRadosUser user(store); + ret = user.get_by_id(bucket_info.owner, y); + if (ret < 0) + return ret; + + policy->create_default(bucket_info.owner, user.get_display_name()); + } + + if (storage_class) { + bufferlist scbl; + int r = rop.get_attr(RGW_ATTR_STORAGE_CLASS, scbl, y); + if (r >= 0) { + *storage_class = scbl.to_str(); + } else { + storage_class->clear(); + } + } + + return ret; +} + static boost::optional get_iam_policy_from_attr(CephContext* cct, rgw::sal::RGWRadosStore* store, @@ -338,6 +394,18 @@ static int get_obj_attrs(rgw::sal::RGWRadosStore *store, struct req_state *s, co return read_op.prepare(s->yield); } +static int get_obj_attrs(rgw::sal::RGWRadosStore *store, struct req_state *s, const rgw_obj& obj, map& attrs, Jager_Tracer& tracer,const Span& parent_span, rgw_obj *target_obj = nullptr) +{ + Span span = tracer.child_span("rgw_op.cc get_obj_attrs", parent_span); + RGWRados::Object op_target(store->getRados(), s->bucket_info, *static_cast(s->obj_ctx), obj); + RGWRados::Object::Read read_op(&op_target); + + read_op.params.attrs = &attrs; + read_op.params.target_obj = target_obj; + + return read_op.prepare(s->yield, tracer, span); +} + static int get_obj_head(rgw::sal::RGWRadosStore *store, struct req_state *s, const rgw_obj& obj, map *attrs, @@ -489,6 +557,32 @@ static int read_bucket_policy(rgw::sal::RGWRadosStore *store, return ret; } +static int read_bucket_policy(rgw::sal::RGWRadosStore *store, + struct req_state *s, + RGWBucketInfo& bucket_info, + map& bucket_attrs, + RGWAccessControlPolicy *policy, + rgw_bucket& bucket, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc read_bucket_policy", parent_span); + if (!s->system_request && bucket_info.flags & BUCKET_SUSPENDED) { + ldpp_dout(s, 0) << "NOTICE: bucket " << bucket_info.bucket.name + << " is suspended" << dendl; + return -ERR_USER_SUSPENDED; + } + + if (bucket.name.empty()) { + return 0; + } + + int ret = rgw_op_get_bucket_policy_from_attr(s->cct, store, bucket_info, bucket_attrs, policy, tracer, span); + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_BUCKET; + } + + return ret; +} + static int read_obj_policy(rgw::sal::RGWRadosStore *store, struct req_state *s, RGWBucketInfo& bucket_info, @@ -553,12 +647,292 @@ static int read_obj_policy(rgw::sal::RGWRadosStore *store, return ret; } +static int read_obj_policy(rgw::sal::RGWRadosStore *store, + struct req_state *s, + RGWBucketInfo& bucket_info, + map& bucket_attrs, + RGWAccessControlPolicy* acl, + string *storage_class, + boost::optional& policy, + rgw_bucket& bucket, + rgw_obj_key& object, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc read_obj_policy", parent_span); + string upload_id; + upload_id = s->info.args.get("uploadId"); + rgw_obj obj; + + if (!s->system_request && bucket_info.flags & BUCKET_SUSPENDED) { + ldpp_dout(s, 0) << "NOTICE: bucket " << bucket_info.bucket.name + << " is suspended" << dendl; + return -ERR_USER_SUSPENDED; + } + + if (!upload_id.empty()) { + /* multipart upload */ + RGWMPObj mp(object.name, upload_id); + string oid = mp.get_meta(); + obj.init_ns(bucket, oid, mp_ns); + obj.set_in_extra_data(true); + } else { + obj = rgw_obj(bucket, object); + } + policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs, bucket.tenant); + + RGWObjectCtx *obj_ctx = static_cast(s->obj_ctx); + int ret = get_obj_policy_from_attr(s->cct, store, *obj_ctx, + bucket_info, bucket_attrs, acl, storage_class, obj, s->yield, tracer, span); + if (ret == -ENOENT) { + /* object does not exist checking the bucket's ACL to make sure + that we send a proper error code */ + RGWAccessControlPolicy bucket_policy(s->cct); + ret = rgw_op_get_bucket_policy_from_attr(s->cct, store, bucket_info, bucket_attrs, &bucket_policy); + if (ret < 0) { + return ret; + } + const rgw_user& bucket_owner = bucket_policy.get_owner().get_id(); + if (bucket_owner.compare(s->user->get_id()) != 0 && + ! s->auth.identity->is_admin_of(bucket_owner)) { + if (policy) { + auto r = policy->eval(s->env, *s->auth.identity, rgw::IAM::s3ListBucket, ARN(bucket)); + if (r == Effect::Allow) + return -ENOENT; + if (r == Effect::Deny) + return -EACCES; + } + if (! bucket_policy.verify_permission(s, *s->auth.identity, s->perm_mask, RGW_PERM_READ)) + ret = -EACCES; + else + ret = -ENOENT; + } else { + ret = -ENOENT; + } + } + + return ret; +} + /** * Get the AccessControlPolicy for an user, bucket or object off of disk. * s: The req_state to draw information from. * only_bucket: If true, reads the user and bucket ACLs rather than the object ACL. * Returns: 0 on success, -ERR# otherwise. */ + + int rgw_build_bucket_policies(rgw::sal::RGWRadosStore* store, struct req_state* s,Jager_Tracer& tracer,const Span& parent_span) + { + Span span=tracer.child_span("rgw_op.cc rgw_build_bucket_policies()",parent_span); + int ret = 0; + rgw_obj_key obj; + auto obj_ctx = store->svc()->sysobj->init_obj_ctx(); + + string bi = s->info.args.get(RGW_SYS_PARAM_PREFIX "bucket-instance"); + if (!bi.empty()) { + string bucket_name; + ret = rgw_bucket_parse_bucket_instance(bi, &bucket_name, &s->bucket_instance_id, &s->bucket_instance_shard_id); + if (ret < 0) { + return ret; + } + } + + if(s->dialect.compare("s3") == 0) { + s->bucket_acl = std::make_unique(s->cct); + } else if(s->dialect.compare("swift") == 0) { + /* We aren't allocating the account policy for those operations using + * the Swift's infrastructure that don't really need req_state::user. + * Typical example here is the implementation of /info. */ + if (!s->user->get_id().empty()) { + s->user_acl = std::make_unique(s->cct); + } + s->bucket_acl = std::make_unique(s->cct); + } else { + s->bucket_acl = std::make_unique(s->cct); + } + + /* check if copy source is within the current domain */ + if (!s->src_bucket_name.empty()) { + RGWBucketInfo source_info; + + if (s->bucket_instance_id.empty()) { + ret = store->getRados()->get_bucket_info(store->svc(), s->src_tenant_name, s->src_bucket_name, source_info, NULL, s->yield); + } else { + ret = store->getRados()->get_bucket_instance_info(obj_ctx, s->bucket_instance_id, source_info, NULL, NULL, s->yield); + } + if (ret == 0) { + string& zonegroup = source_info.zonegroup; + s->local_source = store->svc()->zone->get_zonegroup().equals(zonegroup); + } + } + + struct { + rgw_user uid; + std::string display_name; + } acct_acl_user = { + s->user->get_id(), + s->user->get_display_name(), + }; + + if (!s->bucket_name.empty()) { + s->bucket_exists = true; + + auto b = rgw_bucket(rgw_bucket_key(s->bucket_tenant, s->bucket_name, s->bucket_instance_id)); + + RGWObjVersionTracker ep_ot; + ret = store->ctl()->bucket->read_bucket_info(b, &s->bucket_info, + s->yield, + RGWBucketCtl::BucketInstance::GetParams() + .set_mtime(&s->bucket_mtime) + .set_attrs(&s->bucket_attrs), + &ep_ot); + if (ret < 0) { + if (ret != -ENOENT) { + string bucket_log; + bucket_log = rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name); + ldpp_dout(s, 0) << "NOTICE: couldn't get bucket from bucket_name (name=" + << bucket_log << ")" << dendl; + return ret; + } + s->bucket_exists = false; + } + s->bucket_ep_objv = ep_ot.read_version; + s->bucket = s->bucket_info.bucket; + + if (s->bucket_exists) { + ret = read_bucket_policy(store, s, s->bucket_info, s->bucket_attrs, + s->bucket_acl.get(), s->bucket, tracer, span); + acct_acl_user = { + s->bucket_info.owner, + s->bucket_acl->get_owner().get_display_name(), + }; + } else { + return -ERR_NO_SUCH_BUCKET; + } + + s->bucket_owner = s->bucket_acl->get_owner(); + + RGWZoneGroup zonegroup; + int r = store->svc()->zone->get_zonegroup(s->bucket_info.zonegroup, zonegroup); + if (!r) { + if (!zonegroup.endpoints.empty()) { + s->zonegroup_endpoint = zonegroup.endpoints.front(); + } else { + // use zonegroup's master zone endpoints + auto z = zonegroup.zones.find(zonegroup.master_zone); + if (z != zonegroup.zones.end() && !z->second.endpoints.empty()) { + s->zonegroup_endpoint = z->second.endpoints.front(); + } + } + s->zonegroup_name = zonegroup.get_name(); + } + if (r < 0 && ret == 0) { + ret = r; + } + + if (s->bucket_exists && !store->svc()->zone->get_zonegroup().equals(s->bucket_info.zonegroup)) { + ldpp_dout(s, 0) << "NOTICE: request for data in a different zonegroup (" + << s->bucket_info.zonegroup << " != " + << store->svc()->zone->get_zonegroup().get_id() << ")" << dendl; + /* we now need to make sure that the operation actually requires copy source, that is + * it's a copy operation + */ + if (store->svc()->zone->get_zonegroup().is_master_zonegroup() && s->system_request) { + /*If this is the master, don't redirect*/ + } else if (s->op_type == RGW_OP_GET_BUCKET_LOCATION ) { + /* If op is get bucket location, don't redirect */ + } else if (!s->local_source || + (s->op != OP_PUT && s->op != OP_COPY) || + s->object.empty()) { + return -ERR_PERMANENT_REDIRECT; + } + } + + /* init dest placement -- only if bucket exists, otherwise request is either not relevant, or + * it's a create_bucket request, in which case the op will deal with the placement later */ + if (s->bucket_exists) { + s->dest_placement.storage_class = s->info.storage_class; + s->dest_placement.inherit_from(s->bucket_info.placement_rule); + + if (!store->svc()->zone->get_zone_params().valid_placement(s->dest_placement)) { + ldpp_dout(s, 0) << "NOTICE: invalid dest placement: " << s->dest_placement.to_str() << dendl; + return -EINVAL; + } + } + + if(s->bucket_exists) { + s->bucket_access_conf = get_public_access_conf_from_attr(s->bucket_attrs); + } + } + + /* handle user ACL only for those APIs which support it */ + if (s->user_acl) { + map uattrs; + ret = store->ctl()->user->get_attrs_by_uid(acct_acl_user.uid, &uattrs, s->yield); + if (!ret) { + ret = get_user_policy_from_attr(s->cct, store, uattrs, *s->user_acl); + } + if (-ENOENT == ret) { + /* In already existing clusters users won't have ACL. In such case + * assuming that only account owner has the rights seems to be + * reasonable. That allows to have only one verification logic. + * NOTE: there is small compatibility kludge for global, empty tenant: + * 1. if we try to reach an existing bucket, its owner is considered + * as account owner. + * 2. otherwise account owner is identity stored in s->user->user_id. */ + s->user_acl->create_default(acct_acl_user.uid, + acct_acl_user.display_name); + ret = 0; + } else if (ret < 0) { + ldpp_dout(s, 0) << "NOTICE: couldn't get user attrs for handling ACL " + "(user_id=" << s->user->get_id() << ", ret=" << ret << ")" << dendl; + return ret; + } + } + // We don't need user policies in case of STS token returned by AssumeRole, + // hence the check for user type + if (! s->user->get_id().empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) { + try { + map uattrs; + if (ret = store->ctl()->user->get_attrs_by_uid(s->user->get_id(), &uattrs, s->yield); ! ret) { + if (s->iam_user_policies.empty()) { + s->iam_user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->get_tenant()); + } else { + // This scenario can happen when a STS token has a policy, then we need to append other user policies + // to the existing ones. (e.g. token returned by GetSessionToken) + auto user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->get_tenant()); + s->iam_user_policies.insert(s->iam_user_policies.end(), user_policies.begin(), user_policies.end()); + } + } else { + if (ret == -ENOENT) + ret = 0; + else ret = -EACCES; + } + } catch (const std::exception& e) { + lderr(s->cct) << "Error reading IAM User Policy: " << e.what() << dendl; + ret = -EACCES; + } + } + + try { + s->iam_policy = get_iam_policy_from_attr(s->cct, store, s->bucket_attrs, + s->bucket_tenant); + } catch (const std::exception& e) { + // Really this is a can't happen condition. We parse the policy + // when it's given to us, so perhaps we should abort or otherwise + // raise bloody murder. + ldpp_dout(s, 0) << "Error reading IAM Policy: " << e.what() << dendl; + ret = -EACCES; + } + + bool success = store->svc()->zone->get_redirect_zone_endpoint(&s->redirect_zone_endpoint); + if (success) { + ldpp_dout(s, 20) << "redirect_zone_endpoint=" << s->redirect_zone_endpoint << dendl; + } + + return ret; + } + + + int rgw_build_bucket_policies(rgw::sal::RGWRadosStore* store, struct req_state* s) { int ret = 0; @@ -566,6 +940,7 @@ int rgw_build_bucket_policies(rgw::sal::RGWRadosStore* store, struct req_state* auto obj_ctx = store->svc()->sysobj->init_obj_ctx(); string bi = s->info.args.get(RGW_SYS_PARAM_PREFIX "bucket-instance"); + std::cout<bucket_instance_id, &s->bucket_instance_shard_id); @@ -787,7 +1162,7 @@ int rgw_build_object_policies(rgw::sal::RGWRadosStore *store, struct req_state * } s->object_acl = std::make_unique(s->cct); rgw_obj obj(s->bucket, s->object); - + store->getRados()->set_atomic(s->obj_ctx, obj); if (prefetch_data) { store->getRados()->set_prefetch_data(s->obj_ctx, obj); @@ -800,6 +1175,31 @@ int rgw_build_object_policies(rgw::sal::RGWRadosStore *store, struct req_state * return ret; } +int rgw_build_object_policies(rgw::sal::RGWRadosStore *store, struct req_state *s, + bool prefetch_data, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc rgw_build_object_policies", parent_span); + int ret = 0; + + if (!s->object.empty()) { + if (!s->bucket_exists) { + return -ERR_NO_SUCH_BUCKET; + } + s->object_acl = std::make_unique(s->cct); + rgw_obj obj(s->bucket, s->object); + + store->getRados()->set_atomic(s->obj_ctx, obj); + if (prefetch_data) { + store->getRados()->set_prefetch_data(s->obj_ctx, obj); + } + ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs, + s->object_acl.get(), nullptr, s->iam_policy, s->bucket, + s->object, tracer, span); + } + + return ret; +} + void rgw_add_to_iam_environment(rgw::IAM::Environment& e, std::string_view key, std::string_view val){ // This variant just adds non empty key pairs to IAM env., values can be empty // in certain cases like tagging @@ -919,12 +1319,31 @@ void rgw_build_iam_environment(rgw::sal::RGWRadosStore* store, void rgw_bucket_object_pre_exec(struct req_state *s) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc rgw_bucket_object_pre_exec", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc rgw_bucket_object_pre_exec"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (s->expect_cont) dump_continue(s); dump_bucket_from_state(s); } +void rgw_bucket_object_pre_exec(struct req_state *s, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc rgw_bucket_object_pre_exec", parent_span); + if (s->expect_cont) + dump_continue(s); + + dump_bucket_from_state(s, tracer, parent_span); +} + // So! Now and then when we try to update bucket information, the // bucket has changed during the course of the operation. (Or we have // a cache consistency problem that Watch/Notify isn't ruling out @@ -998,6 +1417,49 @@ int RGWGetObj::verify_permission() return 0; } +int RGWGetObj::verify_permission(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWGetObj::verify_permission",parent_span); + obj = rgw_obj(s->bucket, s->object); + store->getRados()->set_atomic(s->obj_ctx, obj); + if (get_data) { + store->getRados()->set_prefetch_data(s->obj_ctx, obj); + } + + if (torrent.get_flag()) { + if (obj.key.instance.empty()) { + action = rgw::IAM::s3GetObjectTorrent; + } else { + action = rgw::IAM::s3GetObjectVersionTorrent; + } + } else { + if (obj.key.instance.empty()) { + action = rgw::IAM::s3GetObject; + } else { + action = rgw::IAM::s3GetObjectVersion; + } + if (s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG)) + rgw_iam_add_existing_objtags(store, s, obj, action); + if (! s->iam_user_policies.empty()) { + for (auto& user_policy : s->iam_user_policies) { + if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG)) + rgw_iam_add_existing_objtags(store, s, obj, action); + } + } + } + + if (!verify_object_permission(this, s, action, tracer, span)) { + return -EACCES; + } + + if (s->bucket_info.obj_lock_enabled()) { + get_retention = verify_object_permission(this, s, rgw::IAM::s3GetObjectRetention); + get_legal_hold = verify_object_permission(this, s, rgw::IAM::s3GetObjectLegalHold); + } + + return 0; +} + // cache the objects tags into the requests // use inside try/catch as "decode()" may throw void populate_tags_in_request(req_state* s, const std::map& attrs) { @@ -1019,8 +1481,40 @@ void populate_metadata_in_request(req_state* s, std::mapstack_span.empty()) + span = tracer_2.child_span("rgw_op.h RGWOp::verify_op_mask", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_op.h RGWOp::verify_op_mask"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif uint32_t required_mask = op_mask(); ldpp_dout(this, 20) << "required_mask= " << required_mask @@ -1197,7 +1691,7 @@ void RGWGetBucketTags::pre_exec() rgw_bucket_object_pre_exec(s); } -void RGWGetBucketTags::execute() +void RGWGetBucketTags::execute() { auto iter = s->bucket_attrs.find(RGW_ATTR_TAGS); if (iter != s->bucket_attrs.end()) { @@ -1216,7 +1710,7 @@ int RGWPutBucketTags::verify_permission() { void RGWPutBucketTags::execute() { op_ret = get_params(); - if (op_ret < 0) + if (op_ret < 0) return; if (!store->svc()->zone->is_meta_master()) { @@ -1294,7 +1788,7 @@ int RGWPutBucketReplication::verify_permission() { void RGWPutBucketReplication::execute() { op_ret = get_params(); - if (op_ret < 0) + if (op_ret < 0) return; if (!store->svc()->zone->is_meta_master()) { @@ -1387,8 +1881,9 @@ int RGWOp::do_aws4_auth_completion() return 0; } -int RGWOp::init_quota() +int RGWOp::init_quota(Jager_Tracer& tracer,const Span& parent_span) { + Span span=tracer.child_span("rgw_op.cc init_quota()",parent_span); /* no quota enforcement for system requests */ if (s->system_request) return 0; @@ -1432,7 +1927,63 @@ int RGWOp::init_quota() return 0; } -static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) { + +int RGWOp::init_quota() +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.h init_processing", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_op.h init_processing"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + /* no quota enforcement for system requests */ + if (s->system_request) + return 0; + + /* init quota related stuff */ + if (!(s->user->get_info().op_mask & RGW_OP_TYPE_MODIFY)) { + return 0; + } + + /* only interested in object related ops */ + if (s->object.empty()) { + return 0; + } + + rgw::sal::RGWRadosUser owner_user(store); + rgw::sal::RGWUser *user; + + if (s->user->get_id() == s->bucket_owner.get_id()) { + user = s->user; + } else { + int r = owner_user.get_by_id(s->bucket_info.owner, s->yield); + if (r < 0) + return r; + user = &owner_user; + } + + if (s->bucket_info.quota.enabled) { + bucket_quota = s->bucket_info.quota; + } else if (user->get_info().bucket_quota.enabled) { + bucket_quota = user->get_info().bucket_quota; + } else { + bucket_quota = store->svc()->quota->get_bucket_quota(); + } + + if (user->get_info().user_quota.enabled) { + user_quota = user->get_info().user_quota; + } else { + user_quota = store->svc()->quota->get_user_quota(); + } + + return 0; +} + +static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) { uint8_t flags = 0; if (!req_meth) { @@ -2140,6 +2691,21 @@ int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len) return send_response_data(bl, bl_ofs, bl_len); } +int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len, const Span& parent_span) +{ + /* garbage collection related handling */ + utime_t start_time = ceph_clock_now(); + if (start_time > gc_invalidate_time) { + int r = store->getRados()->defer_gc(s->obj_ctx, s->bucket_info, obj, s->yield); + if (r < 0) { + ldpp_dout(this, 0) << "WARNING: could not defer gc entry for obj" << dendl; + } + gc_invalidate_time = start_time; + gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2); + } + return send_response_data(bl, bl_ofs, bl_len, parent_span); +} + bool RGWGetObj::prefetch_data() { /* HEAD request, stop prefetch*/ @@ -2170,6 +2736,12 @@ void RGWGetObj::pre_exec() rgw_bucket_object_pre_exec(s); } +void RGWGetObj::pre_exec(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWGetObj::pre_exec",parent_span); + rgw_bucket_object_pre_exec(s, tracer, span); +} + static bool object_is_expired(map& attrs) { map::iterator iter = attrs.find(RGW_ATTR_DELETE_AT); if (iter != attrs.end()) { @@ -2205,6 +2777,14 @@ static inline void rgw_cond_decode_objtags( } } +static inline void rgw_cond_decode_objtags( + struct req_state *s, + const std::map &attrs, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc rgw_cond_decode_objtags", parent_span); + rgw_cond_decode_objtags(s, attrs); +} + void RGWGetObj::execute() { bufferlist bl; @@ -2380,6 +2960,182 @@ void RGWGetObj::execute() send_response_data_error(); } +void RGWGetObj::execute(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWGetObj::execute",parent_span); + bufferlist bl; + gc_invalidate_time = ceph_clock_now(); + gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2); + + bool need_decompress; + int64_t ofs_x, end_x; + + RGWGetObj_CB cb(this); + RGWGetObj_Filter* filter = (RGWGetObj_Filter *)&cb; + boost::optional decompress; + std::unique_ptr decrypt; + map::iterator attr_iter; + + perfcounter->inc(l_rgw_get); + + RGWRados::Object op_target(store->getRados(), s->bucket_info, *static_cast(s->obj_ctx), obj); + RGWRados::Object::Read read_op(&op_target); + + op_ret = get_params(); + if (op_ret < 0) + goto done_err; + + op_ret = init_common(tracer,span); + if (op_ret < 0) + goto done_err; + + read_op.conds.mod_ptr = mod_ptr; + read_op.conds.unmod_ptr = unmod_ptr; + read_op.conds.high_precision_time = s->system_request; /* system request need to use high precision time */ + read_op.conds.mod_zone_id = mod_zone_id; + read_op.conds.mod_pg_ver = mod_pg_ver; + read_op.conds.if_match = if_match; + read_op.conds.if_nomatch = if_nomatch; + read_op.params.attrs = &attrs; + read_op.params.lastmod = &lastmod; + read_op.params.obj_size = &s->obj_size; + + op_ret = read_op.prepare(s->yield, tracer, span); + if (op_ret < 0) + goto done_err; + version_id = read_op.state.obj.key.instance; + + /* STAT ops don't need data, and do no i/o */ + if (get_type() == RGW_OP_STAT_OBJ) { + return; + } + + /* start gettorrent */ + if (torrent.get_flag()) + { + attr_iter = attrs.find(RGW_ATTR_CRYPT_MODE); + if (attr_iter != attrs.end() && attr_iter->second.to_str() == "SSE-C-AES256") { + ldpp_dout(this, 0) << "ERROR: torrents are not supported for objects " + "encrypted with SSE-C" << dendl; + op_ret = -EINVAL; + goto done_err; + } + torrent.init(s, store); + op_ret = torrent.get_torrent_file(read_op, total_len, bl, obj); + if (op_ret < 0) + { + ldpp_dout(this, 0) << "ERROR: failed to get_torrent_file ret= " << op_ret + << dendl; + goto done_err; + } + op_ret = send_response_data(bl, 0, total_len); + if (op_ret < 0) + { + ldpp_dout(this, 0) << "ERROR: failed to send_response_data ret= " << op_ret << dendl; + goto done_err; + } + return; + } + /* end gettorrent */ + + op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info, tracer, span); + if (op_ret < 0) { + ldpp_dout(s, 0) << "ERROR: failed to decode compression info, cannot decompress" << dendl; + goto done_err; + } + if (need_decompress) { + s->obj_size = cs_info.orig_size; + decompress.emplace(s->cct, &cs_info, partial_content, filter); + filter = &*decompress; + } + + attr_iter = attrs.find(RGW_ATTR_USER_MANIFEST); + if (attr_iter != attrs.end() && !skip_manifest) { + op_ret = handle_user_manifest(attr_iter->second.c_str()); + if (op_ret < 0) { + ldpp_dout(this, 0) << "ERROR: failed to handle user manifest ret=" + << op_ret << dendl; + goto done_err; + } + return; + } + + attr_iter = attrs.find(RGW_ATTR_SLO_MANIFEST); + if (attr_iter != attrs.end() && !skip_manifest) { + is_slo = true; + op_ret = handle_slo_manifest(attr_iter->second); + if (op_ret < 0) { + ldpp_dout(this, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret + << dendl; + goto done_err; + } + return; + } + + // for range requests with obj size 0 + if (range_str && !(s->obj_size)) { + total_len = 0; + op_ret = -ERANGE; + goto done_err; + } + + op_ret = read_op.range_to_ofs(s->obj_size, ofs, end, tracer, span); + if (op_ret < 0) + goto done_err; + total_len = (ofs <= end ? end + 1 - ofs : 0); + + /* Check whether the object has expired. Swift API documentation + * stands that we should return 404 Not Found in such case. */ + if (need_object_expiration() && object_is_expired(attrs)) { + op_ret = -ENOENT; + goto done_err; + } + + /* Decode S3 objtags, if any */ + rgw_cond_decode_objtags(s, attrs, tracer, span); + + start = ofs; + + attr_iter = attrs.find(RGW_ATTR_MANIFEST); + op_ret = this->get_decrypt_filter(&decrypt, filter, + attr_iter != attrs.end() ? &(attr_iter->second) : nullptr); + if (decrypt != nullptr) { + filter = decrypt.get(); + } + if (op_ret < 0) { + goto done_err; + } + + if (!get_data || ofs > end) { + send_response_data(bl, 0, 0, parent_span); + return; + } + + perfcounter->inc(l_rgw_get_b, end - ofs); + + ofs_x = ofs; + end_x = end; + filter->fixup_range(ofs_x, end_x); + op_ret = read_op.iterate(ofs_x, end_x, filter, s->yield, tracer, span); + + if (op_ret >= 0) + op_ret = filter->flush(); + + perfcounter->tinc(l_rgw_get_lat, s->time_elapsed()); + if (op_ret < 0) { + goto done_err; + } + + op_ret = send_response_data(bl, 0, 0, parent_span); + if (op_ret < 0) { + goto done_err; + } + return; + +done_err: + send_response_data_error(parent_span); +} + int RGWGetObj::init_common() { if (range_str) { @@ -2405,13 +3161,62 @@ int RGWGetObj::init_common() return 0; } -int RGWListBuckets::verify_permission() +int RGWGetObj::init_common(Jager_Tracer& tracer, const Span& parent_span) { - rgw::Partition partition = rgw::Partition::aws; - rgw::Service service = rgw::Service::s3; + Span span = tracer.child_span("rgw_op.cc RGWGetObj::init_common",parent_span); + if (range_str) { + /* range parsed error when prefetch */ + if (!range_parsed) { + int r = parse_range(); + if (r < 0) + return r; + } + } + if (if_mod) { + if (parse_time(if_mod, &mod_time) < 0) + return -EINVAL; + mod_ptr = &mod_time; + } - if (!verify_user_permission(this, s, ARN(partition, service, "", s->user->get_tenant(), "*"), rgw::IAM::s3ListAllMyBuckets)) { - return -EACCES; + if (if_unmod) { + if (parse_time(if_unmod, &unmod_time) < 0) + return -EINVAL; + unmod_ptr = &unmod_time; + } + + return 0; +} + +int RGWListBuckets::verify_permission() +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && global_state->stack_span.empty()) + span = tracer_2.new_span("rgw_op.cc RGWListBuckets::verify_permission()"); + else + span = tracer_2.child_span("rgw_op.cc RGWListBuckets::verify_permission()", global_state->stack_span.top()); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + rgw::Partition partition = rgw::Partition::aws; + rgw::Service service = rgw::Service::s3; + + if (!verify_user_permission(this, s, ARN(partition, service, "", s->user->get_tenant(), "*"), rgw::IAM::s3ListAllMyBuckets)) { + return -EACCES; + } + + return 0; +} + +int RGWListBuckets::verify_permission(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWListBuckets::verify_permission", parent_span); + rgw::Partition partition = rgw::Partition::aws; + rgw::Service service = rgw::Service::s3; + + if (!verify_user_permission(this, s, ARN(partition, service, "", s->user->get_tenant(), "*"), rgw::IAM::s3ListAllMyBuckets)) { + return -EACCES; } return 0; @@ -2426,8 +3231,114 @@ int RGWGetUsage::verify_permission() return 0; } +void RGWListBuckets::execute(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWListBuckets::execute()",parent_span); + Span response_span = nullptr; + bool done; + bool started = false; + uint64_t total_count = 0; + + const uint64_t max_buckets = s->cct->_conf->rgw_list_buckets_max_chunk; + + op_ret = get_params(tracer, span); + if (op_ret < 0) { + goto send_end; + } + + if (supports_account_metadata()) { + op_ret = store->ctl()->user->get_attrs_by_uid(s->user->get_id(), &attrs, s->yield); + if (op_ret < 0) { + goto send_end; + } + } + + is_truncated = false; + do { + rgw::sal::RGWBucketList buckets; + uint64_t read_count; + if (limit >= 0) { + read_count = min(limit - total_count, max_buckets); + } else { + read_count = max_buckets; + } + + rgw::sal::RGWRadosUser user(store, s->user->get_id()); + + op_ret = user.list_buckets(marker, end_marker, read_count, should_get_stats(), buckets, tracer, span); + + if (op_ret < 0) { + /* hmm.. something wrong here.. the user was authenticated, so it + should exist */ + ldpp_dout(this, 10) << "WARNING: failed on rgw_get_user_buckets uid=" + << s->user->get_id() << dendl; + break; + } + + /* We need to have stats for all our policies - even if a given policy + * isn't actually used in a given account. In such situation its usage + * stats would be simply full of zeros. */ + for (const auto& policy : store->svc()->zone->get_zonegroup().placement_targets) { + policies_stats.emplace(policy.second.name, + decltype(policies_stats)::mapped_type()); + } + + std::map& m = buckets.get_buckets(); + for (const auto& kv : m) { + const auto& bucket = kv.second; + + global_stats.bytes_used += bucket->get_size(); + global_stats.bytes_used_rounded += bucket->get_size_rounded(); + global_stats.objects_count += bucket->get_count(); + + /* operator[] still can create a new entry for storage policy seen + * for first time. */ + auto& policy_stats = policies_stats[bucket->get_placement_rule().to_str()]; + policy_stats.bytes_used += bucket->get_size(); + policy_stats.bytes_used_rounded += bucket->get_size_rounded(); + policy_stats.buckets_count++; + policy_stats.objects_count += bucket->get_count(); + } + global_stats.buckets_count += m.size(); + total_count += m.size(); + + done = (m.size() < read_count || (limit >= 0 && total_count >= (uint64_t)limit)); + + if (!started) { + send_response_begin(buckets.count() > 0, tracer, span, response_span); + started = true; + } + + if (read_count > 0 && + !m.empty()) { + map::reverse_iterator riter = m.rbegin(); + marker = riter->first; + + handle_listing_chunk(std::move(buckets), tracer, span); + } + } while (is_truncated && !done); + +send_end: + if (!started) { + send_response_begin(false, tracer, span, response_span); + } + send_response_end(std::move(response_span), parent_span); +} + + + void RGWListBuckets::execute() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWListBuckets::execute", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_op.cc RGWListBuckets::execute"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif bool done; bool started = false; uint64_t total_count = 0; @@ -2525,7 +3436,7 @@ void RGWGetUsage::execute() op_ret = get_params(); if (op_ret < 0) return; - + if (!start_date.empty()) { op_ret = utime_t::parse_date(start_date, &start_epoch, NULL); if (op_ret < 0) { @@ -2533,7 +3444,7 @@ void RGWGetUsage::execute() return; } } - + if (!end_date.empty()) { op_ret = utime_t::parse_date(end_date, &end_epoch, NULL); if (op_ret < 0) { @@ -2541,13 +3452,13 @@ void RGWGetUsage::execute() return; } } - + uint32_t max_entries = 1000; bool is_truncated = true; RGWUsageIter usage_iter; - + while (is_truncated) { op_ret = store->getRados()->read_usage(s->user->get_id(), s->bucket_name, start_epoch, end_epoch, max_entries, &is_truncated, usage_iter, usage); @@ -2559,7 +3470,7 @@ void RGWGetUsage::execute() if (op_ret < 0) { return; - } + } } op_ret = rgw_user_sync_all_stats(store, s->user->get_id()); @@ -2579,7 +3490,7 @@ void RGWGetUsage::execute() ldpp_dout(this, 0) << "ERROR: can't read user header" << dendl; return; } - + return; } @@ -2819,7 +3730,7 @@ void RGWDeleteBucketWebsite::execute() bufferlist in_data; op_ret = forward_request_to_master(s, nullptr, store, in_data, nullptr); if (op_ret < 0) { - ldpp_dout(this, 0) << "NOTICE: forward_to_master failed on bucket=" << s->bucket.name + ldpp_dout(this, 0) << "NOTICE: forward_to_master failed on bucket=" << s->bucket.name << "returned err=" << op_ret << dendl; return; } @@ -2867,6 +3778,16 @@ void RGWStatBucket::execute() int RGWListBucket::verify_permission() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWListBucket::verify_permission", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc RGWListBucket::verify_permission"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif op_ret = get_params(); if (op_ret < 0) { return op_ret; @@ -2890,6 +3811,32 @@ int RGWListBucket::verify_permission() return 0; } +int RGWListBucket::verify_permission(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWListBucket::verify_permission", parent_span); + op_ret = get_params(tracer, span); + if (op_ret < 0) { + return op_ret; + } + if (!prefix.empty()) + s->env.emplace("s3:prefix", prefix); + + if (!delimiter.empty()) + s->env.emplace("s3:delimiter", delimiter); + + s->env.emplace("s3:max-keys", std::to_string(max)); + + if (!verify_bucket_permission(this, + s, + list_versions ? + rgw::IAM::s3ListBucketVersions : + rgw::IAM::s3ListBucket, tracer, span)) { + return -EACCES; + } + + return 0; +} + int RGWListBucket::parse_max_keys() { // Bound max value of max-keys to configured value for security @@ -2901,13 +3848,34 @@ int RGWListBucket::parse_max_keys() default_max); } +int RGWListBucket::parse_max_keys(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWListBucket::parse_max_keys", parent_span); + return RGWListBucket::parse_max_keys(); +} + void RGWListBucket::pre_exec() { rgw_bucket_object_pre_exec(s); } +void RGWListBucket::pre_exec(Jager_Tracer& tracer,const Span& parent_span) +{ + rgw_bucket_object_pre_exec(s, tracer, parent_span); +} + void RGWListBucket::execute() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWListBucket::execute", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc RGWListBucket::execute"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (!s->bucket_exists) { op_ret = -ERR_NO_SUCH_BUCKET; return; @@ -2943,6 +3911,44 @@ void RGWListBucket::execute() } } +void RGWListBucket::execute(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWListBucket::execute",parent_span); + if (!s->bucket_exists) { + op_ret = -ERR_NO_SUCH_BUCKET; + return; + } + + if (allow_unordered && !delimiter.empty()) { + ldpp_dout(this, 0) << + "ERROR: unordered bucket listing requested with a delimiter" << dendl; + op_ret = -EINVAL; + return; + } + + if (need_container_stats()) { + op_ret = bucket->update_container_stats(tracer, span); + } + + RGWRados::Bucket target(store->getRados(), s->bucket_info); + if (shard_id >= 0) { + target.set_shard_id(shard_id); + } + RGWRados::Bucket::List list_op(&target); + + list_op.params.prefix = prefix; + list_op.params.delim = delimiter; + list_op.params.marker = marker; + list_op.params.end_marker = end_marker; + list_op.params.list_versions = list_versions; + list_op.params.allow_unordered = allow_unordered; + + op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated, tracer, span, s->yield); + if (op_ret >= 0) { + next_marker = list_op.get_next_marker(); + } +} + int RGWGetBucketLogging::verify_permission() { return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketLogging); @@ -2999,6 +4005,53 @@ int RGWCreateBucket::verify_permission() return 0; } +int RGWCreateBucket::verify_permission(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWCreateBucket::verify_permission",parent_span); + /* This check is mostly needed for S3 that doesn't support account ACL. + * Swift doesn't allow to delegate any permission to an anonymous user, + * so it will become an early exit in such case. */ + if (s->auth.identity->is_anonymous()) { + return -EACCES; + } + + rgw_bucket bucket; + bucket.name = s->bucket_name; + bucket.tenant = s->bucket_tenant; + ARN arn = ARN(bucket); + if (!verify_user_permission(this, s, arn, rgw::IAM::s3CreateBucket, tracer, span)) { + return -EACCES; + } + + if (s->user->get_tenant() != s->bucket_tenant) { + ldpp_dout(this, 10) << "user cannot create a bucket in a different tenant" + << " (user_id.tenant=" << s->user->get_tenant() + << " requested=" << s->bucket_tenant << ")" + << dendl; + return -EACCES; + } + if (s->user->get_max_buckets() < 0) { + return -EPERM; + } + + if (s->user->get_max_buckets()) { + rgw::sal::RGWBucketList buckets; + string marker; + op_ret = rgw_read_user_buckets(store, s->user->get_id(), buckets, + marker, string(), s->user->get_max_buckets(),tracer,span, + false); + if (op_ret < 0) { + return op_ret; + } + + if ((int)buckets.count() >= s->user->get_max_buckets()) { + return -ERR_TOO_MANY_BUCKETS; + } + } + + return 0; +} + int forward_request_to_master(struct req_state *s, obj_version *objv, rgw::sal::RGWRadosStore *store, bufferlist& in_data, JSONParser *jp, req_info *forward_info) @@ -3030,6 +4083,12 @@ void RGWCreateBucket::pre_exec() rgw_bucket_object_pre_exec(s); } +void RGWCreateBucket::pre_exec(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWCreateBucket::pre_exec",parent_span); + rgw_bucket_object_pre_exec(s, tracer , span); +} + static void prepare_add_del_attrs(const map& orig_attrs, map& out_attrs, map& out_rmattrs) @@ -3155,6 +4214,15 @@ static int filter_out_quota_info(std::map& add_attrs, return 0; } +static int filter_out_quota_info(std::map& add_attrs, + const std::set& rmattr_names, + RGWQuotaInfo& quota, Jager_Tracer& tracer, const Span& parent_span, + bool * quota_extracted = nullptr) +{ + Span span = tracer.child_span("rgw_op.cc filter_out_quota_info", parent_span); + return filter_out_quota_info(add_attrs, rmattr_names, quota, quota_extracted); +} + static void filter_out_website(std::map& add_attrs, const std::set& rmattr_names, @@ -3468,362 +4536,1292 @@ void RGWCreateBucket::execute() } } -int RGWDeleteBucket::verify_permission() -{ - if (!verify_bucket_permission(this, s, rgw::IAM::s3DeleteBucket)) { - return -EACCES; - } - - return 0; -} -void RGWDeleteBucket::pre_exec() +void RGWCreateBucket::execute(Jager_Tracer& tracer,const Span& parent_span) { - rgw_bucket_object_pre_exec(s); -} + Span span=tracer.child_span("rgw_op.cc RGWCreateBucket::execute",parent_span); + RGWAccessControlPolicy old_policy(s->cct); + buffer::list aclbl; + buffer::list corsbl; + bool existed; + string bucket_name = rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name); + rgw_raw_obj obj(store->svc()->zone->get_zone_params().domain_root, bucket_name); + obj_version objv, *pobjv = NULL; -void RGWDeleteBucket::execute() -{ - if (s->bucket_name.empty()) { - op_ret = -EINVAL; + op_ret = get_params(tracer, span); + if (op_ret < 0) return; - } - if (!s->bucket_exists) { - ldpp_dout(this, 0) << "ERROR: bucket " << s->bucket_name << " not found" << dendl; - op_ret = -ERR_NO_SUCH_BUCKET; + if (!relaxed_region_enforcement && + !location_constraint.empty() && + !store->svc()->zone->has_zonegroup_api(location_constraint)) { + ldpp_dout(this, 0) << "location constraint (" << location_constraint << ")" + << " can't be found." << dendl; + op_ret = -ERR_INVALID_LOCATION_CONSTRAINT; + s->err.message = "The specified location-constraint is not valid"; + return; + } + + if (!relaxed_region_enforcement && !store->svc()->zone->get_zonegroup().is_master_zonegroup() && !location_constraint.empty() && + store->svc()->zone->get_zonegroup().api_name != location_constraint) { + ldpp_dout(this, 0) << "location constraint (" << location_constraint << ")" + << " doesn't match zonegroup" << " (" << store->svc()->zone->get_zonegroup().api_name << ")" + << dendl; + op_ret = -ERR_INVALID_LOCATION_CONSTRAINT; + s->err.message = "The specified location-constraint is not valid"; return; } - RGWObjVersionTracker ot; - ot.read_version = s->bucket_ep_objv; - if (s->system_request) { - string tag = s->info.args.get(RGW_SYS_PARAM_PREFIX "tag"); - string ver_str = s->info.args.get(RGW_SYS_PARAM_PREFIX "ver"); - if (!tag.empty()) { - ot.read_version.tag = tag; - uint64_t ver; - string err; - ver = strict_strtol(ver_str.c_str(), 10, &err); - if (!err.empty()) { - ldpp_dout(this, 0) << "failed to parse ver param" << dendl; - op_ret = -EINVAL; + const auto& zonegroup = store->svc()->zone->get_zonegroup(); + if (!placement_rule.name.empty() && + !zonegroup.placement_targets.count(placement_rule.name)) { + ldpp_dout(this, 0) << "placement target (" << placement_rule.name << ")" + << " doesn't exist in the placement targets of zonegroup" + << " (" << store->svc()->zone->get_zonegroup().api_name << ")" << dendl; + op_ret = -ERR_INVALID_LOCATION_CONSTRAINT; + s->err.message = "The specified placement target does not exist"; + return; + } + + /* we need to make sure we read bucket info, it's not read before for this + * specific request */ + s->bucket.tenant = s->bucket_tenant; + s->bucket.name = s->bucket_name; + rgw::sal::RGWBucket* bucket = NULL; + op_ret = store->get_bucket(*s->user, s->bucket, &bucket); + if (op_ret < 0 && op_ret != -ENOENT) + return; + s->bucket_exists = (op_ret != -ENOENT); + + s->bucket_owner.set_id(s->user->get_id()); + s->bucket_owner.set_name(s->user->get_display_name()); + if (s->bucket_exists) { + s->bucket_info = bucket->get_info(); + s->bucket_attrs = bucket->get_attrs(); + delete bucket; + int r = rgw_op_get_bucket_policy_from_attr(s->cct, store, s->bucket_info, + s->bucket_attrs, &old_policy, tracer, span); + if (r >= 0) { + if (old_policy.get_owner().get_id().compare(s->user->get_id()) != 0) { + op_ret = -EEXIST; return; } - ot.read_version.ver = ver; } } - op_ret = store->ctl()->bucket->sync_user_stats(s->user->get_id(), s->bucket_info); - if ( op_ret < 0) { - ldpp_dout(this, 1) << "WARNING: failed to sync user stats before bucket delete: op_ret= " << op_ret << dendl; - } - - op_ret = store->getRados()->check_bucket_empty(s->bucket_info, s->yield); - if (op_ret < 0) { - return; - } + RGWBucketInfo master_info; + rgw_bucket *pmaster_bucket; + uint32_t *pmaster_num_shards; + real_time creation_time; if (!store->svc()->zone->is_meta_master()) { - bufferlist in_data; - op_ret = forward_request_to_master(s, &ot.read_version, store, in_data, - NULL); + JSONParser jp; + op_ret = forward_request_to_master(s, NULL, store, in_data, &jp); if (op_ret < 0) { - if (op_ret == -ENOENT) { - /* adjust error, we want to return with NoSuchBucket and not - * NoSuchKey */ - op_ret = -ERR_NO_SUCH_BUCKET; - } return; } + + JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp); + JSONDecoder::decode_json("object_ver", objv, &jp); + JSONDecoder::decode_json("bucket_info", master_info, &jp); + ldpp_dout(this, 20) << "parsed: objv.tag=" << objv.tag << " objv.ver=" << objv.ver << dendl; + ldpp_dout(this, 20) << "got creation time: << " << master_info.creation_time << dendl; + pmaster_bucket= &master_info.bucket; + creation_time = master_info.creation_time; + pmaster_num_shards = &master_info.num_shards; + pobjv = &objv; + obj_lock_enabled = master_info.obj_lock_enabled(); + } else { + pmaster_bucket = NULL; + pmaster_num_shards = NULL; } - string prefix, delimiter; + string zonegroup_id; - if (s->prot_flags & RGW_REST_SWIFT) { - string path_args; - path_args = s->info.args.get("path"); - if (!path_args.empty()) { - if (!delimiter.empty() || !prefix.empty()) { - op_ret = -EINVAL; - return; - } - prefix = path_args; - delimiter="/"; + if (s->system_request) { + zonegroup_id = s->info.args.get(RGW_SYS_PARAM_PREFIX "zonegroup"); + if (zonegroup_id.empty()) { + zonegroup_id = store->svc()->zone->get_zonegroup().get_id(); } + } else { + zonegroup_id = store->svc()->zone->get_zonegroup().get_id(); } - op_ret = abort_bucket_multiparts(store, s->cct, s->bucket_info, prefix, delimiter); - - if (op_ret < 0) { - return; + if (s->bucket_exists) { + rgw_placement_rule selected_placement_rule; + rgw_bucket bucket; + bucket.tenant = s->bucket_tenant; + bucket.name = s->bucket_name; + op_ret = store->svc()->zone->select_bucket_placement(s->user->get_info(), + zonegroup_id, + placement_rule, + &selected_placement_rule, nullptr); + if (selected_placement_rule != s->bucket_info.placement_rule) { + op_ret = -EEXIST; + return; + } } - op_ret = store->getRados()->delete_bucket(s->bucket_info, ot, s->yield, false); + /* Encode special metadata first as we're using std::map::emplace under + * the hood. This method will add the new items only if the map doesn't + * contain such keys yet. */ + policy.encode(aclbl); + emplace_attr(RGW_ATTR_ACL, std::move(aclbl)); - if (op_ret == -ECANCELED) { - // lost a race, either with mdlog sync or another delete bucket operation. - // in either case, we've already called ctl.bucket->unlink_bucket() - op_ret = 0; - return; + if (has_cors) { + cors_config.encode(corsbl); + emplace_attr(RGW_ATTR_CORS, std::move(corsbl)); } - if (op_ret == 0) { - op_ret = store->ctl()->bucket->unlink_bucket(s->bucket_info.owner, - s->bucket, s->yield, false); + RGWQuotaInfo quota_info; + const RGWQuotaInfo * pquota_info = nullptr; + if (need_metadata_upload()) { + /* It's supposed that following functions WILL NOT change any special + * attributes (like RGW_ATTR_ACL) if they are already present in attrs. */ + op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, tracer, span, false); if (op_ret < 0) { - ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret - << dendl; + return; } - } -} - -int RGWPutObj::verify_permission() -{ - if (! copy_source.empty()) { + prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs); + populate_with_generic_attrs(s, attrs); - RGWAccessControlPolicy cs_acl(s->cct); - boost::optional policy; - map cs_attrs; - rgw_bucket cs_bucket(copy_source_bucket_info.bucket); - rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id); + op_ret = filter_out_quota_info(attrs, rmattr_names, quota_info, tracer, span); + if (op_ret < 0) { + return; + } else { + pquota_info = "a_info; + } - rgw_obj obj(cs_bucket, cs_object); - store->getRados()->set_atomic(s->obj_ctx, obj); - store->getRados()->set_prefetch_data(s->obj_ctx, obj); + /* Web site of Swift API. */ + filter_out_website(attrs, rmattr_names, s->bucket_info.website_conf); + s->bucket_info.has_website = !s->bucket_info.website_conf.is_empty(); + } - /* check source object permissions */ - if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr, - policy, cs_bucket, cs_object) < 0) { - return -EACCES; - } + s->bucket.tenant = s->bucket_tenant; /* ignored if bucket exists */ + s->bucket.name = s->bucket_name; - /* admin request overrides permission checks */ - if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) { - if (policy || ! s->iam_user_policies.empty()) { - auto usr_policy_res = Effect::Pass; - for (auto& user_policy : s->iam_user_policies) { - if (usr_policy_res = user_policy.eval(s->env, *s->auth.identity, - cs_object.instance.empty() ? - rgw::IAM::s3GetObject : - rgw::IAM::s3GetObjectVersion, - rgw::ARN(obj)); usr_policy_res == Effect::Deny) - return -EACCES; - else if (usr_policy_res == Effect::Allow) - break; - } - rgw::IAM::Effect e = Effect::Pass; - if (policy) { - e = policy->eval(s->env, *s->auth.identity, - cs_object.instance.empty() ? - rgw::IAM::s3GetObject : - rgw::IAM::s3GetObjectVersion, - rgw::ARN(obj)); + /* Handle updates of the metadata for Swift's object versioning. */ + if (swift_ver_location) { + s->bucket_info.swift_ver_location = *swift_ver_location; + s->bucket_info.swift_versioning = (! swift_ver_location->empty()); } - if (e == Effect::Deny) { - return -EACCES; - } else if (usr_policy_res == Effect::Pass && e == Effect::Pass && - !cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, - RGW_PERM_READ)) { - return -EACCES; - } - } else if (!cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, - RGW_PERM_READ)) { - return -EACCES; - } - } + if (obj_lock_enabled) { + info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; } - if (s->bucket_access_conf && s->bucket_access_conf->block_public_acls()) { - if (s->canned_acl.compare("public-read") || - s->canned_acl.compare("public-read-write") || - s->canned_acl.compare("authenticated-read")) - return -EACCES; - } - auto op_ret = get_params(); - if (op_ret < 0) { - ldpp_dout(this, 20) << "get_params() returned ret=" << op_ret << dendl; - return op_ret; - } + op_ret = store->getRados()->create_bucket(s->user->get_info(), s->bucket, zonegroup_id, + placement_rule, s->bucket_info.swift_ver_location, + pquota_info, attrs, + info, pobjv, &ep_objv, creation_time, + pmaster_bucket, pmaster_num_shards, tracer, span, true); + /* continue if EEXIST and create_bucket will fail below. this way we can + * recover from a partial create by retrying it. */ + ldpp_dout(this, 20) << "rgw_create_bucket returned ret=" << op_ret << " bucket=" << s->bucket << dendl; - if (s->iam_policy || ! s->iam_user_policies.empty()) { - rgw_add_grant_to_iam_environment(s->env, s); + if (op_ret && op_ret != -EEXIST) + return; - rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl); + existed = (op_ret == -EEXIST); - if (obj_tags != nullptr && obj_tags->count() > 0){ - auto tags = obj_tags->get_tags(); - for (const auto& kv: tags){ - rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second); + if (existed) { + /* bucket already existed, might have raced with another bucket creation, or + * might be partial bucket creation that never completed. Read existing bucket + * info, verify that the reported bucket owner is the current user. + * If all is ok then update the user's list of buckets. + * Otherwise inform client about a name conflict. + */ + if (info.owner.compare(s->user->get_id()) != 0) { + op_ret = -EEXIST; + return; + } + s->bucket = info.bucket; + } + + op_ret = store->ctl()->bucket->link_bucket(s->user->get_id(), s->bucket, + info.creation_time, s->yield, tracer, span, false); + if (op_ret && !existed && op_ret != -EEXIST) { + /* if it exists (or previously existed), don't remove it! */ + op_ret = store->ctl()->bucket->unlink_bucket(s->user->get_id(), s->bucket, s->yield); + if (op_ret < 0) { + ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret + << dendl; + } + } else if (op_ret == -EEXIST || (op_ret == 0 && existed)) { + op_ret = -ERR_BUCKET_EXISTS; + } + + if (need_metadata_upload() && existed) { + /* OK, it looks we lost race with another request. As it's required to + * handle metadata fusion and upload, the whole operation becomes very + * similar in nature to PutMetadataBucket. However, as the attrs may + * changed in the meantime, we have to refresh. */ + short tries = 0; + do { + RGWBucketInfo binfo; + map battrs; + + op_ret = store->getRados()->get_bucket_info(store->svc(), s->bucket_tenant, s->bucket_name, + binfo, nullptr, s->yield, &battrs); + if (op_ret < 0) { + return; + } else if (binfo.owner.compare(s->user->get_id()) != 0) { + /* New bucket doesn't belong to the account we're operating on. */ + op_ret = -EEXIST; + return; + } else { + s->bucket_info = binfo; + s->bucket_attrs = battrs; + } + + attrs.clear(); + + op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, false); + if (op_ret < 0) { + return; + } + prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs); + populate_with_generic_attrs(s, attrs); + op_ret = filter_out_quota_info(attrs, rmattr_names, s->bucket_info.quota); + if (op_ret < 0) { + return; } + + /* Handle updates of the metadata for Swift's object versioning. */ + if (swift_ver_location) { + s->bucket_info.swift_ver_location = *swift_ver_location; + s->bucket_info.swift_versioning = (! swift_ver_location->empty()); + } + + /* Web site of Swift API. */ + filter_out_website(attrs, rmattr_names, s->bucket_info.website_conf); + s->bucket_info.has_website = !s->bucket_info.website_conf.is_empty(); + + /* This will also set the quota on the bucket. */ + op_ret = store->ctl()->bucket->set_bucket_instance_attrs(s->bucket_info, attrs, + &s->bucket_info.objv_tracker, + s->yield); + } while (op_ret == -ECANCELED && tries++ < 20); + + /* Restore the proper return code. */ + if (op_ret >= 0) { + op_ret = -ERR_BUCKET_EXISTS; } + } +} - constexpr auto encrypt_attr = "x-amz-server-side-encryption"; - constexpr auto s3_encrypt_attr = "s3:x-amz-server-side-encryption"; - auto enc_header = s->info.x_meta_map.find(encrypt_attr); - if (enc_header != s->info.x_meta_map.end()){ - rgw_add_to_iam_environment(s->env, s3_encrypt_attr, enc_header->second); +int RGWDeleteBucket::verify_permission() +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWDeleteBucket::verify_permission", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc RGWDeleteBucket::verify_permission"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + if (!verify_bucket_permission(this, s, rgw::IAM::s3DeleteBucket)) { + return -EACCES; + } + + return 0; +} + +int RGWDeleteBucket::verify_permission(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWDeleteBucket::verify_permission", parent_span); + if (!verify_bucket_permission(this, s, rgw::IAM::s3DeleteBucket, tracer, span)) { + return -EACCES; + } + + return 0; +} + +void RGWDeleteBucket::pre_exec() +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWDeleteBucket::pre_exec", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc RGWDeleteBucket::pre_exec"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + rgw_bucket_object_pre_exec(s); +} + +void RGWDeleteBucket::pre_exec(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWDeleteBucket::pre_exec", parent_span); + rgw_bucket_object_pre_exec(s, tracer, span); +} + +void RGWDeleteBucket::execute() +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.cc RGWDeleteBucket::exec", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_op.cc RGWDeleteBucket::exec"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + if (s->bucket_name.empty()) { + op_ret = -EINVAL; + return; + } + + if (!s->bucket_exists) { + ldpp_dout(this, 0) << "ERROR: bucket " << s->bucket_name << " not found" << dendl; + op_ret = -ERR_NO_SUCH_BUCKET; + return; + } + RGWObjVersionTracker ot; + ot.read_version = s->bucket_ep_objv; + + if (s->system_request) { + string tag = s->info.args.get(RGW_SYS_PARAM_PREFIX "tag"); + string ver_str = s->info.args.get(RGW_SYS_PARAM_PREFIX "ver"); + if (!tag.empty()) { + ot.read_version.tag = tag; + uint64_t ver; + string err; + ver = strict_strtol(ver_str.c_str(), 10, &err); + if (!err.empty()) { + ldpp_dout(this, 0) << "failed to parse ver param" << dendl; + op_ret = -EINVAL; + return; + } + ot.read_version.ver = ver; } + } - constexpr auto kms_attr = "x-amz-server-side-encryption-aws-kms-key-id"; - constexpr auto s3_kms_attr = "s3:x-amz-server-side-encryption-aws-kms-key-id"; - auto kms_header = s->info.x_meta_map.find(kms_attr); - if (kms_header != s->info.x_meta_map.end()){ - rgw_add_to_iam_environment(s->env, s3_kms_attr, kms_header->second); + op_ret = store->ctl()->bucket->sync_user_stats(s->user->get_id(), s->bucket_info); + if ( op_ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to sync user stats before bucket delete: op_ret= " << op_ret << dendl; + } + + op_ret = store->getRados()->check_bucket_empty(s->bucket_info, s->yield); + if (op_ret < 0) { + return; + } + + if (!store->svc()->zone->is_meta_master()) { + bufferlist in_data; + op_ret = forward_request_to_master(s, &ot.read_version, store, in_data, + NULL); + if (op_ret < 0) { + if (op_ret == -ENOENT) { + /* adjust error, we want to return with NoSuchBucket and not + * NoSuchKey */ + op_ret = -ERR_NO_SUCH_BUCKET; + } + return; } + } - auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env, - boost::none, - rgw::IAM::s3PutObject, - rgw_obj(s->bucket, s->object)); - if (usr_policy_res == Effect::Deny) - return -EACCES; + string prefix, delimiter; - rgw::IAM::Effect e = Effect::Pass; - if (s->iam_policy) { - e = s->iam_policy->eval(s->env, *s->auth.identity, - rgw::IAM::s3PutObject, - rgw_obj(s->bucket, s->object)); + if (s->prot_flags & RGW_REST_SWIFT) { + string path_args; + path_args = s->info.args.get("path"); + if (!path_args.empty()) { + if (!delimiter.empty() || !prefix.empty()) { + op_ret = -EINVAL; + return; + } + prefix = path_args; + delimiter="/"; } - if (e == Effect::Allow) { - return 0; - } else if (e == Effect::Deny) { - return -EACCES; - } else if (usr_policy_res == Effect::Allow) { - return 0; + } + + op_ret = abort_bucket_multiparts(store, s->cct, s->bucket_info, prefix, delimiter); + + if (op_ret < 0) { + return; + } + + op_ret = store->getRados()->delete_bucket(s->bucket_info, ot, s->yield, false); + + if (op_ret == -ECANCELED) { + // lost a race, either with mdlog sync or another delete bucket operation. + // in either case, we've already called ctl.bucket->unlink_bucket() + op_ret = 0; + return; + } + + if (op_ret == 0) { + op_ret = store->ctl()->bucket->unlink_bucket(s->bucket_info.owner, + s->bucket, s->yield, false); + if (op_ret < 0) { + ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret + << dendl; + } + } +} + +void RGWDeleteBucket::execute(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWDeleteBucket::execute", parent_span); + if (s->bucket_name.empty()) { + op_ret = -EINVAL; + return; + } + + if (!s->bucket_exists) { + ldpp_dout(this, 0) << "ERROR: bucket " << s->bucket_name << " not found" << dendl; + op_ret = -ERR_NO_SUCH_BUCKET; + return; + } + RGWObjVersionTracker ot; + ot.read_version = s->bucket_ep_objv; + + if (s->system_request) { + string tag = s->info.args.get(RGW_SYS_PARAM_PREFIX "tag"); + string ver_str = s->info.args.get(RGW_SYS_PARAM_PREFIX "ver"); + if (!tag.empty()) { + ot.read_version.tag = tag; + uint64_t ver; + string err; + ver = strict_strtol(ver_str.c_str(), 10, &err); + if (!err.empty()) { + ldpp_dout(this, 0) << "failed to parse ver param" << dendl; + op_ret = -EINVAL; + return; + } + ot.read_version.ver = ver; + } + } + + op_ret = store->ctl()->bucket->sync_user_stats(s->user->get_id(), s->bucket_info, tracer, span); + if ( op_ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to sync user stats before bucket delete: op_ret= " << op_ret << dendl; + } + + op_ret = store->getRados()->check_bucket_empty(s->bucket_info, s->yield, tracer, span); + if (op_ret < 0) { + return; + } + + if (!store->svc()->zone->is_meta_master()) { + bufferlist in_data; + op_ret = forward_request_to_master(s, &ot.read_version, store, in_data, + NULL); + if (op_ret < 0) { + if (op_ret == -ENOENT) { + /* adjust error, we want to return with NoSuchBucket and not + * NoSuchKey */ + op_ret = -ERR_NO_SUCH_BUCKET; + } + return; + } + } + + string prefix, delimiter; + + if (s->prot_flags & RGW_REST_SWIFT) { + string path_args; + path_args = s->info.args.get("path"); + if (!path_args.empty()) { + if (!delimiter.empty() || !prefix.empty()) { + op_ret = -EINVAL; + return; + } + prefix = path_args; + delimiter="/"; + } + } + + op_ret = abort_bucket_multiparts(store, s->cct, s->bucket_info, prefix, delimiter, tracer, span); + + if (op_ret < 0) { + return; + } + + op_ret = store->getRados()->delete_bucket(s->bucket_info, ot, s->yield, tracer, span, false); + + if (op_ret == -ECANCELED) { + // lost a race, either with mdlog sync or another delete bucket operation. + // in either case, we've already called ctl.bucket->unlink_bucket() + op_ret = 0; + return; + } + + if (op_ret == 0) { + op_ret = store->ctl()->bucket->unlink_bucket(s->bucket_info.owner, + s->bucket, s->yield, tracer, span, false); + if (op_ret < 0) { + ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret + << dendl; + } + } +} + +int RGWPutObj::verify_permission() +{ + if (! copy_source.empty()) { + + RGWAccessControlPolicy cs_acl(s->cct); + boost::optional policy; + map cs_attrs; + rgw_bucket cs_bucket(copy_source_bucket_info.bucket); + rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id); + + rgw_obj obj(cs_bucket, cs_object); + store->getRados()->set_atomic(s->obj_ctx, obj); + store->getRados()->set_prefetch_data(s->obj_ctx, obj); + + /* check source object permissions */ + if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr, + policy, cs_bucket, cs_object) < 0) { + return -EACCES; + } + + /* admin request overrides permission checks */ + if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) { + if (policy || ! s->iam_user_policies.empty()) { + auto usr_policy_res = Effect::Pass; + for (auto& user_policy : s->iam_user_policies) { + if (usr_policy_res = user_policy.eval(s->env, *s->auth.identity, + cs_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + rgw::ARN(obj)); usr_policy_res == Effect::Deny) + return -EACCES; + else if (usr_policy_res == Effect::Allow) + break; + } + rgw::IAM::Effect e = Effect::Pass; + if (policy) { + e = policy->eval(s->env, *s->auth.identity, + cs_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + rgw::ARN(obj)); + } + if (e == Effect::Deny) { + return -EACCES; + } else if (usr_policy_res == Effect::Pass && e == Effect::Pass && + !cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } else if (!cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } + } + + if (s->bucket_access_conf && s->bucket_access_conf->block_public_acls()) { + if (s->canned_acl.compare("public-read") || + s->canned_acl.compare("public-read-write") || + s->canned_acl.compare("authenticated-read")) + return -EACCES; + } + + auto op_ret = get_params(); + if (op_ret < 0) { + ldpp_dout(this, 20) << "get_params() returned ret=" << op_ret << dendl; + return op_ret; + } + + if (s->iam_policy || ! s->iam_user_policies.empty()) { + rgw_add_grant_to_iam_environment(s->env, s); + + rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl); + + if (obj_tags != nullptr && obj_tags->count() > 0){ + auto tags = obj_tags->get_tags(); + for (const auto& kv: tags){ + rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second); + } + } + + constexpr auto encrypt_attr = "x-amz-server-side-encryption"; + constexpr auto s3_encrypt_attr = "s3:x-amz-server-side-encryption"; + auto enc_header = s->info.x_meta_map.find(encrypt_attr); + if (enc_header != s->info.x_meta_map.end()){ + rgw_add_to_iam_environment(s->env, s3_encrypt_attr, enc_header->second); + } + + constexpr auto kms_attr = "x-amz-server-side-encryption-aws-kms-key-id"; + constexpr auto s3_kms_attr = "s3:x-amz-server-side-encryption-aws-kms-key-id"; + auto kms_header = s->info.x_meta_map.find(kms_attr); + if (kms_header != s->info.x_meta_map.end()){ + rgw_add_to_iam_environment(s->env, s3_kms_attr, kms_header->second); + } + + auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env, + boost::none, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (usr_policy_res == Effect::Deny) + return -EACCES; + + rgw::IAM::Effect e = Effect::Pass; + if (s->iam_policy) { + e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + } + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } else if (usr_policy_res == Effect::Allow) { + return 0; + } + } + + if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) { + return -EACCES; + } + + return 0; +} + + + +int RGWPutObj::verify_permission(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWPutObj::verify_permission", parent_span); + if (! copy_source.empty()) { + + RGWAccessControlPolicy cs_acl(s->cct); + boost::optional policy; + map cs_attrs; + rgw_bucket cs_bucket(copy_source_bucket_info.bucket); + rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id); + + rgw_obj obj(cs_bucket, cs_object); + store->getRados()->set_atomic(s->obj_ctx, obj); + store->getRados()->set_prefetch_data(s->obj_ctx, obj); + + /* check source object permissions */ + if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr, + policy, cs_bucket, cs_object) < 0) { + return -EACCES; + } + + /* admin request overrides permission checks */ + if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) { + if (policy || ! s->iam_user_policies.empty()) { + auto usr_policy_res = Effect::Pass; + for (auto& user_policy : s->iam_user_policies) { + if (usr_policy_res = user_policy.eval(s->env, *s->auth.identity, + cs_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + rgw::ARN(obj)); usr_policy_res == Effect::Deny) + return -EACCES; + else if (usr_policy_res == Effect::Allow) + break; + } + rgw::IAM::Effect e = Effect::Pass; + if (policy) { + e = policy->eval(s->env, *s->auth.identity, + cs_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + rgw::ARN(obj)); + } + if (e == Effect::Deny) { + return -EACCES; + } else if (usr_policy_res == Effect::Pass && e == Effect::Pass && + !cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } else if (!cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } + } + + if (s->bucket_access_conf && s->bucket_access_conf->block_public_acls()) { + if (s->canned_acl.compare("public-read") || + s->canned_acl.compare("public-read-write") || + s->canned_acl.compare("authenticated-read")) + return -EACCES; + } + + auto op_ret = get_params(tracer, span); + if (op_ret < 0) { + ldpp_dout(this, 20) << "get_params() returned ret=" << op_ret << dendl; + return op_ret; + } + + if (s->iam_policy || ! s->iam_user_policies.empty()) { + rgw_add_grant_to_iam_environment(s->env, s); + + rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl); + + if (obj_tags != nullptr && obj_tags->count() > 0){ + auto tags = obj_tags->get_tags(); + for (const auto& kv: tags){ + rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second); + } + } + + constexpr auto encrypt_attr = "x-amz-server-side-encryption"; + constexpr auto s3_encrypt_attr = "s3:x-amz-server-side-encryption"; + auto enc_header = s->info.x_meta_map.find(encrypt_attr); + if (enc_header != s->info.x_meta_map.end()){ + rgw_add_to_iam_environment(s->env, s3_encrypt_attr, enc_header->second); + } + + constexpr auto kms_attr = "x-amz-server-side-encryption-aws-kms-key-id"; + constexpr auto s3_kms_attr = "s3:x-amz-server-side-encryption-aws-kms-key-id"; + auto kms_header = s->info.x_meta_map.find(kms_attr); + if (kms_header != s->info.x_meta_map.end()){ + rgw_add_to_iam_environment(s->env, s3_kms_attr, kms_header->second); + } + + auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env, + boost::none, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (usr_policy_res == Effect::Deny) + return -EACCES; + + rgw::IAM::Effect e = Effect::Pass; + if (s->iam_policy) { + e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + } + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } else if (usr_policy_res == Effect::Allow) { + return 0; + } + } + + if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE, tracer, span)) { + return -EACCES; + } + + return 0; +} + + +void RGWPutObj::pre_exec(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWPutObj::pre_exec",parent_span); + rgw_bucket_object_pre_exec(s, tracer, span); +} + + +void RGWPutObj::pre_exec() +{ + rgw_bucket_object_pre_exec(s); +} + +class RGWPutObj_CB : public RGWGetObj_Filter +{ + RGWPutObj *op; +public: + explicit RGWPutObj_CB(RGWPutObj *_op) : op(_op) {} + ~RGWPutObj_CB() override {} + + int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override { + return op->get_data_cb(bl, bl_ofs, bl_len); + } +}; + +int RGWPutObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len) +{ + bufferlist bl_tmp; + bl.begin(bl_ofs).copy(bl_len, bl_tmp); + + bl_aux.append(bl_tmp); + + return bl_len; +} + +int RGWPutObj::get_data(const off_t fst, const off_t lst, bufferlist& bl) +{ + RGWPutObj_CB cb(this); + RGWGetObj_Filter* filter = &cb; + boost::optional decompress; + std::unique_ptr decrypt; + RGWCompressionInfo cs_info; + map attrs; + map::iterator attr_iter; + int ret = 0; + + uint64_t obj_size; + int64_t new_ofs, new_end; + + new_ofs = fst; + new_end = lst; + + rgw_obj_key obj_key(copy_source_object_name, copy_source_version_id); + rgw_obj obj(copy_source_bucket_info.bucket, obj_key); + + RGWRados::Object op_target(store->getRados(), copy_source_bucket_info, *static_cast(s->obj_ctx), obj); + RGWRados::Object::Read read_op(&op_target); + read_op.params.obj_size = &obj_size; + read_op.params.attrs = &attrs; + + ret = read_op.prepare(s->yield); + if (ret < 0) + return ret; + + bool need_decompress; + op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info); + if (op_ret < 0) { + ldpp_dout(s, 0) << "ERROR: failed to decode compression info" << dendl; + return -EIO; + } + + bool partial_content = true; + if (need_decompress) + { + obj_size = cs_info.orig_size; + decompress.emplace(s->cct, &cs_info, partial_content, filter); + filter = &*decompress; + } + + attr_iter = attrs.find(RGW_ATTR_MANIFEST); + op_ret = this->get_decrypt_filter(&decrypt, + filter, + attrs, + attr_iter != attrs.end() ? &(attr_iter->second) : nullptr); + if (decrypt != nullptr) { + filter = decrypt.get(); + } + if (op_ret < 0) { + return ret; + } + + ret = read_op.range_to_ofs(obj_size, new_ofs, new_end); + if (ret < 0) + return ret; + + filter->fixup_range(new_ofs, new_end); + ret = read_op.iterate(new_ofs, new_end, filter, s->yield); + + if (ret >= 0) + ret = filter->flush(); + + bl.claim_append(bl_aux); + + return ret; +} + +// special handling for compression type = "random" with multipart uploads +static CompressorRef get_compressor_plugin(const req_state *s, + const std::string& compression_type) +{ + if (compression_type != "random") { + return Compressor::create(s->cct, compression_type); + } + + bool is_multipart{false}; + const auto& upload_id = s->info.args.get("uploadId", &is_multipart); + + if (!is_multipart) { + return Compressor::create(s->cct, compression_type); + } + + // use a hash of the multipart upload id so all parts use the same plugin + const auto alg = std::hash{}(upload_id) % Compressor::COMP_ALG_LAST; + if (alg == Compressor::COMP_ALG_NONE) { + return nullptr; + } + return Compressor::create(s->cct, alg); +} + + +void RGWPutObj::execute(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWPutObj::execute",parent_span); + char supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1]; + char supplied_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + MD5 hash; + bufferlist bl, aclbl, bs; + int len; + + off_t fst; + off_t lst; + + bool need_calc_md5 = (dlo_manifest == NULL) && (slo_info == NULL); + perfcounter->inc(l_rgw_put); + // report latency on return + auto put_lat = make_scope_guard([&] { + perfcounter->tinc(l_rgw_put_lat, s->time_elapsed()); + }); + + op_ret = -EINVAL; + if (s->object.empty()) { + return; + } + + if (!s->bucket_exists) { + op_ret = -ERR_NO_SUCH_BUCKET; + return; + } + + + op_ret = get_system_versioning_params(s, &olh_epoch, &version_id); + if (op_ret < 0) { + ldpp_dout(this, 20) << "get_system_versioning_params() returned ret=" + << op_ret << dendl; + return; + } + + if (supplied_md5_b64) { + need_calc_md5 = true; + + ldpp_dout(this, 15) << "supplied_md5_b64=" << supplied_md5_b64 << dendl; + op_ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1], + supplied_md5_b64, supplied_md5_b64 + strlen(supplied_md5_b64)); + ldpp_dout(this, 15) << "ceph_armor ret=" << op_ret << dendl; + if (op_ret != CEPH_CRYPTO_MD5_DIGESTSIZE) { + op_ret = -ERR_INVALID_DIGEST; + return; + } + + buf_to_hex((const unsigned char *)supplied_md5_bin, CEPH_CRYPTO_MD5_DIGESTSIZE, supplied_md5); + ldpp_dout(this, 15) << "supplied_md5=" << supplied_md5 << dendl; + } + + if (!chunked_upload) { /* with chunked upload we don't know how big is the upload. + we also check sizes at the end anyway */ + op_ret = store->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket, + user_quota, bucket_quota, s->content_length, tracer, span); + if (op_ret < 0) { + ldpp_dout(this, 20) << "check_quota() returned ret=" << op_ret << dendl; + return; + } + } + + if (supplied_etag) { + strncpy(supplied_md5, supplied_etag, sizeof(supplied_md5) - 1); + supplied_md5[sizeof(supplied_md5) - 1] = '\0'; + } + + const bool multipart = !multipart_upload_id.empty(); + auto& obj_ctx = *static_cast(s->obj_ctx); + rgw_obj obj{s->bucket, s->object}; + + /* Handle object versioning of Swift API. */ + if (! multipart) { + op_ret = store->getRados()->swift_versioning_copy(obj_ctx, + s->bucket_owner.get_id(), + s->bucket_info, + obj, + this, + s->yield, tracer, span); + if (op_ret < 0) { + return; + } + } + + // create the object processor + auto aio = rgw::make_throttle(s->cct->_conf->rgw_put_obj_min_window_size, + s->yield, tracer, span); + using namespace rgw::putobj; + constexpr auto max_processor_size = std::max({sizeof(MultipartObjectProcessor), + sizeof(AtomicObjectProcessor), + sizeof(AppendObjectProcessor)}); + ceph::static_ptr processor; + + rgw_placement_rule *pdest_placement; + + multipart_upload_info upload_info; + if (multipart) { + RGWMPObj mp(s->object.name, multipart_upload_id); + + op_ret = get_multipart_info(store, s, mp.get_meta(), nullptr, nullptr, &upload_info); + if (op_ret < 0) { + if (op_ret != -ENOENT) { + ldpp_dout(this, 0) << "ERROR: get_multipart_info returned " << op_ret << ": " << cpp_strerror(-op_ret) << dendl; + } else {// -ENOENT: raced with upload complete/cancel, no need to spam log + ldpp_dout(this, 20) << "failed to get multipart info (returned " << op_ret << ": " << cpp_strerror(-op_ret) << "): probably raced with upload complete / cancel" << dendl; + } + return; + } + pdest_placement = &upload_info.dest_placement; + ldpp_dout(this, 20) << "dest_placement for part=" << upload_info.dest_placement << dendl; + processor.emplace( + &*aio, store, s->bucket_info, pdest_placement, + s->owner.get_id(), obj_ctx, obj, + multipart_upload_id, multipart_part_num, multipart_part_str, + this, s->yield); + } else if(append) { + if (s->bucket_info.versioned()) { + op_ret = -ERR_INVALID_BUCKET_STATE; + return; + } + pdest_placement = &s->dest_placement; + processor.emplace( + &*aio, store, s->bucket_info, pdest_placement, s->bucket_owner.get_id(),obj_ctx, obj, + s->req_id, position, &cur_accounted_size, this, s->yield); + } else { + if (s->bucket_info.versioning_enabled()) { + if (!version_id.empty()) { + obj.key.set_instance(version_id); + } else { + store->getRados()->gen_rand_obj_instance_name(&obj); + version_id = obj.key.instance; + } + } + pdest_placement = &s->dest_placement; + processor.emplace( + &*aio, store, s->bucket_info, pdest_placement, + s->bucket_owner.get_id(), obj_ctx, obj, olh_epoch, + s->req_id, this, s->yield); + } + + op_ret = processor->prepare(s->yield, tracer, span); + if (op_ret < 0) { + ldpp_dout(this, 20) << "processor->prepare() returned ret=" << op_ret + << dendl; + return; + } + + if ((! copy_source.empty()) && !copy_source_range) { + rgw_obj_key obj_key(copy_source_object_name, copy_source_version_id); + rgw_obj obj(copy_source_bucket_info.bucket, obj_key.name); + + RGWObjState *astate; + op_ret = store->getRados()->get_obj_state(&obj_ctx, copy_source_bucket_info, obj, + &astate, true, s->yield, false); + if (op_ret < 0) { + ldpp_dout(this, 0) << "ERROR: get copy source obj state returned with error" << op_ret << dendl; + return; + } + if (!astate->exists){ + op_ret = -ENOENT; + return; + } + lst = astate->accounted_size - 1; + } else { + lst = copy_source_range_lst; + } + + fst = copy_source_range_fst; + + // no filters by default + DataProcessor *filter = processor.get(); + + const auto& compression_type = store->svc()->zone->get_zone_params().get_compression_type(*pdest_placement); + CompressorRef plugin; + boost::optional compressor; + + std::unique_ptr encrypt; + + if (!append) { // compression and encryption only apply to full object uploads + op_ret = get_encrypt_filter(&encrypt, filter); + if (op_ret < 0) { + return; + } + if (encrypt != nullptr) { + filter = &*encrypt; + } else if (compression_type != "none") { + plugin = get_compressor_plugin(s, compression_type); + if (!plugin) { + ldpp_dout(this, 1) << "Cannot load plugin for compression type " + << compression_type << dendl; + } else { + compressor.emplace(s->cct, plugin, filter); + filter = &*compressor; + } + } + } + tracepoint(rgw_op, before_data_transfer, s->req_id.c_str()); + do { + bufferlist data; + if (fst > lst) + break; + if (copy_source.empty()) { + // len = get_data(data,tracer,span); + len=get_data(data, tracer, span); + } else { + uint64_t cur_lst = min(fst + s->cct->_conf->rgw_max_chunk_size - 1, lst); + op_ret = get_data(fst, cur_lst, data); + if (op_ret < 0) + return; + len = data.length(); + s->content_length += len; + fst += len; + } + if (len < 0) { + op_ret = len; + ldpp_dout(this, 20) << "get_data() returned ret=" << op_ret << dendl; + return; + } else if (len == 0) { + break; + } + + if (need_calc_md5) { + hash.Update((const unsigned char *)data.c_str(), data.length()); } - } - - if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) { - return -EACCES; - } - return 0; -} + /* update torrrent */ + torrent.update(data, tracer, span); + op_ret = filter->process(std::move(data), ofs, tracer, span); + if (op_ret < 0) { + ldpp_dout(this, 20) << "processor->process() returned ret=" + << op_ret << dendl; + return; + } -void RGWPutObj::pre_exec() -{ - rgw_bucket_object_pre_exec(s); -} + ofs += len; + } while (len > 0); + tracepoint(rgw_op, after_data_transfer, s->req_id.c_str(), ofs); -class RGWPutObj_CB : public RGWGetObj_Filter -{ - RGWPutObj *op; -public: - explicit RGWPutObj_CB(RGWPutObj *_op) : op(_op) {} - ~RGWPutObj_CB() override {} + // flush any data in filters + op_ret = filter->process({}, ofs); + if (op_ret < 0) { + return; + } - int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override { - return op->get_data_cb(bl, bl_ofs, bl_len); + if (!chunked_upload && ofs != s->content_length) { + op_ret = -ERR_REQUEST_TIMEOUT; + return; } -}; + s->obj_size = ofs; -int RGWPutObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len) -{ - bufferlist bl_tmp; - bl.begin(bl_ofs).copy(bl_len, bl_tmp); + perfcounter->inc(l_rgw_put_b, s->obj_size); - bl_aux.append(bl_tmp); + op_ret = do_aws4_auth_completion(); + if (op_ret < 0) { + return; + } - return bl_len; -} + op_ret = store->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket, + user_quota, bucket_quota, s->obj_size, tracer, span); + if (op_ret < 0) { + ldpp_dout(this, 20) << "second check_quota() returned op_ret=" << op_ret << dendl; + return; + } -int RGWPutObj::get_data(const off_t fst, const off_t lst, bufferlist& bl) -{ - RGWPutObj_CB cb(this); - RGWGetObj_Filter* filter = &cb; - boost::optional decompress; - std::unique_ptr decrypt; - RGWCompressionInfo cs_info; - map attrs; - map::iterator attr_iter; - int ret = 0; + hash.Final(m); - uint64_t obj_size; - int64_t new_ofs, new_end; + if (compressor && compressor->is_compressed()) { + bufferlist tmp; + RGWCompressionInfo cs_info; + cs_info.compression_type = plugin->get_type_name(); + cs_info.orig_size = s->obj_size; + cs_info.blocks = move(compressor->get_compression_blocks()); + encode(cs_info, tmp); + attrs[RGW_ATTR_COMPRESSION] = tmp; + ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION + << " with type=" << cs_info.compression_type + << ", orig_size=" << cs_info.orig_size + << ", blocks=" << cs_info.blocks.size() << dendl; + } - new_ofs = fst; - new_end = lst; + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5, tracer, span); - rgw_obj_key obj_key(copy_source_object_name, copy_source_version_id); - rgw_obj obj(copy_source_bucket_info.bucket, obj_key); + etag = calc_md5; - RGWRados::Object op_target(store->getRados(), copy_source_bucket_info, *static_cast(s->obj_ctx), obj); - RGWRados::Object::Read read_op(&op_target); - read_op.params.obj_size = &obj_size; - read_op.params.attrs = &attrs; + if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) { + op_ret = -ERR_BAD_DIGEST; + return; + } - ret = read_op.prepare(s->yield); - if (ret < 0) - return ret; + policy.encode(aclbl); + emplace_attr(RGW_ATTR_ACL, std::move(aclbl)); - bool need_decompress; - op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info); - if (op_ret < 0) { - ldpp_dout(s, 0) << "ERROR: failed to decode compression info" << dendl; - return -EIO; + if (dlo_manifest) { + op_ret = encode_dlo_manifest_attr(dlo_manifest, attrs); + if (op_ret < 0) { + ldpp_dout(this, 0) << "bad user manifest: " << dlo_manifest << dendl; + return; + } } - bool partial_content = true; - if (need_decompress) - { - obj_size = cs_info.orig_size; - decompress.emplace(s->cct, &cs_info, partial_content, filter); - filter = &*decompress; + if (slo_info) { + bufferlist manifest_bl; + encode(*slo_info, manifest_bl); + emplace_attr(RGW_ATTR_SLO_MANIFEST, std::move(manifest_bl)); } - attr_iter = attrs.find(RGW_ATTR_MANIFEST); - op_ret = this->get_decrypt_filter(&decrypt, - filter, - attrs, - attr_iter != attrs.end() ? &(attr_iter->second) : nullptr); - if (decrypt != nullptr) { - filter = decrypt.get(); + if (supplied_etag && etag.compare(supplied_etag) != 0) { + op_ret = -ERR_UNPROCESSABLE_ENTITY; + return; } + bl.append(etag.c_str(), etag.size()); + emplace_attr(RGW_ATTR_ETAG, std::move(bl)); + + populate_with_generic_attrs(s, attrs); + op_ret = rgw_get_request_metadata(s->cct, s->info, attrs); if (op_ret < 0) { - return ret; + return; } + encode_delete_at_attr(delete_at, attrs, tracer, span); + encode_obj_tags_attr(obj_tags.get(), attrs, tracer, span); + rgw_cond_decode_objtags(s, attrs, tracer, span); - ret = read_op.range_to_ofs(obj_size, new_ofs, new_end); - if (ret < 0) - return ret; - - filter->fixup_range(new_ofs, new_end); - ret = read_op.iterate(new_ofs, new_end, filter, s->yield); - - if (ret >= 0) - ret = filter->flush(); - - bl.claim_append(bl_aux); - - return ret; -} - -// special handling for compression type = "random" with multipart uploads -static CompressorRef get_compressor_plugin(const req_state *s, - const std::string& compression_type) -{ - if (compression_type != "random") { - return Compressor::create(s->cct, compression_type); + /* Add a custom metadata to expose the information whether an object + * is an SLO or not. Appending the attribute must be performed AFTER + * processing any input from user in order to prohibit overwriting. */ + if (slo_info) { + bufferlist slo_userindicator_bl; + slo_userindicator_bl.append("True", 4); + emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl)); + } + if (obj_legal_hold) { + bufferlist obj_legal_hold_bl; + obj_legal_hold->encode(obj_legal_hold_bl); + emplace_attr(RGW_ATTR_OBJECT_LEGAL_HOLD, std::move(obj_legal_hold_bl)); + } + if (obj_retention) { + bufferlist obj_retention_bl; + obj_retention->encode(obj_retention_bl); + emplace_attr(RGW_ATTR_OBJECT_RETENTION, std::move(obj_retention_bl)); } - bool is_multipart{false}; - const auto& upload_id = s->info.args.get("uploadId", &is_multipart); + tracepoint(rgw_op, processor_complete_enter, s->req_id.c_str()); + op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, + (delete_at ? *delete_at : real_time()), if_match, if_nomatch, + (user_data.empty() ? nullptr : &user_data), nullptr, nullptr, + s->yield, tracer, span); + tracepoint(rgw_op, processor_complete_exit, s->req_id.c_str()); - if (!is_multipart) { - return Compressor::create(s->cct, compression_type); + /* produce torrent */ + if (s->cct->_conf->rgw_torrent_flag && (ofs == torrent.get_data_len())) + { + torrent.init(s, store); + torrent.set_create_date(mtime); + op_ret = torrent.complete(); + if (0 != op_ret) + { + ldpp_dout(this, 0) << "ERROR: torrent.handle_data() returned " << op_ret << dendl; + return; + } } - // use a hash of the multipart upload id so all parts use the same plugin - const auto alg = std::hash{}(upload_id) % Compressor::COMP_ALG_LAST; - if (alg == Compressor::COMP_ALG_NONE) { - return nullptr; + // send request to notification manager + const auto ret = rgw::notify::publish(s, s->object, s->obj_size, mtime, etag, rgw::notify::ObjectCreatedPut, store); + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl; + // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed + // this should be global conf (probably returnign a different handler) + // so we don't need to read the configured values before we perform it } - return Compressor::create(s->cct, alg); } + + + void RGWPutObj::execute() { char supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1]; @@ -3833,7 +5831,7 @@ void RGWPutObj::execute() MD5 hash; bufferlist bl, aclbl, bs; int len; - + off_t fst; off_t lst; @@ -4378,7 +6376,7 @@ void RGWPostObj::execute() buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); etag = calc_md5; - + if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) { op_ret = -ERR_BAD_DIGEST; return; @@ -4742,31 +6740,92 @@ int RGWDeleteObj::handle_slo_manifest(bufferlist& bl) return -EINVAL; } - RGWBulkDelete::acct_path_t path; - - path.bucket_name = url_decode(path_str.substr(1, sep_pos - 1)); - path.obj_key = url_decode(path_str.substr(sep_pos + 1)); - - items.push_back(path); + RGWBulkDelete::acct_path_t path; + + path.bucket_name = url_decode(path_str.substr(1, sep_pos - 1)); + path.obj_key = url_decode(path_str.substr(sep_pos + 1)); + + items.push_back(path); + } + + /* Request removal of the manifest object itself. */ + RGWBulkDelete::acct_path_t path; + path.bucket_name = s->bucket_name; + path.obj_key = s->object; + items.push_back(path); + + int ret = deleter->delete_chunk(items); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWDeleteObj::verify_permission() +{ + int op_ret = get_params(); + if (op_ret) { + return op_ret; + } + if (s->iam_policy || ! s->iam_user_policies.empty()) { + if (s->bucket_info.obj_lock_enabled() && bypass_governance_mode) { + auto r = eval_user_policies(s->iam_user_policies, s->env, boost::none, + rgw::IAM::s3BypassGovernanceRetention, ARN(s->bucket, s->object.name)); + if (r == Effect::Deny) { + bypass_perm = false; + } else if (r == Effect::Pass && s->iam_policy) { + r = s->iam_policy->eval(s->env, *s->auth.identity, rgw::IAM::s3BypassGovernanceRetention, + ARN(s->bucket, s->object.name)); + if (r == Effect::Deny) { + bypass_perm = false; + } + } + } + auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env, + boost::none, + s->object.instance.empty() ? + rgw::IAM::s3DeleteObject : + rgw::IAM::s3DeleteObjectVersion, + ARN(s->bucket, s->object.name)); + if (usr_policy_res == Effect::Deny) { + return -EACCES; + } + + rgw::IAM::Effect r = Effect::Pass; + if (s->iam_policy) { + r = s->iam_policy->eval(s->env, *s->auth.identity, + s->object.instance.empty() ? + rgw::IAM::s3DeleteObject : + rgw::IAM::s3DeleteObjectVersion, + ARN(s->bucket, s->object.name)); + } + if (r == Effect::Allow) + return 0; + else if (r == Effect::Deny) + return -EACCES; + else if (usr_policy_res == Effect::Allow) + return 0; } - /* Request removal of the manifest object itself. */ - RGWBulkDelete::acct_path_t path; - path.bucket_name = s->bucket_name; - path.obj_key = s->object; - items.push_back(path); + if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) { + return -EACCES; + } - int ret = deleter->delete_chunk(items); - if (ret < 0) { - return ret; + if (s->bucket_info.mfa_enabled() && + !s->object.instance.empty() && + !s->mfa_verified) { + ldpp_dout(this, 5) << "NOTICE: object delete request with a versioned object, mfa auth not provided" << dendl; + return -ERR_MFA_REQUIRED; } return 0; } -int RGWDeleteObj::verify_permission() +int RGWDeleteObj::verify_permission(Jager_Tracer& tracer, const Span& parent_span) { - int op_ret = get_params(); + Span span = tracer.child_span("rgw_op.cc RGWDeleteObj::verify_permission",parent_span); + int op_ret = get_params(tracer, span); if (op_ret) { return op_ret; } @@ -4810,7 +6869,7 @@ int RGWDeleteObj::verify_permission() return 0; } - if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) { + if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE, tracer, span)) { return -EACCES; } @@ -4829,6 +6888,12 @@ void RGWDeleteObj::pre_exec() rgw_bucket_object_pre_exec(s); } +void RGWDeleteObj::pre_exec(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWDeleteObj::pre_exec",parent_span); + rgw_bucket_object_pre_exec(s, tracer, span); +} + void RGWDeleteObj::execute() { if (!s->bucket_exists) { @@ -4989,6 +7054,167 @@ void RGWDeleteObj::execute() } } +void RGWDeleteObj::execute(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWDeleteObj::execute",parent_span); + if (!s->bucket_exists) { + op_ret = -ERR_NO_SUCH_BUCKET; + return; + } + + rgw_obj obj(s->bucket, s->object); + map attrs; + + bool check_obj_lock = obj.key.have_instance() && s->bucket_info.obj_lock_enabled(); + + if (!s->object.empty()) { + op_ret = get_obj_attrs(store, s, obj, attrs, tracer, span); + + if (need_object_expiration() || multipart_delete) { + /* check if obj exists, read orig attrs */ + if (op_ret < 0) { + return; + } + } + + if (check_obj_lock) { + /* check if obj exists, read orig attrs */ + if (op_ret < 0) { + if (op_ret == -ENOENT) { + /* object maybe delete_marker, skip check_obj_lock*/ + check_obj_lock = false; + } else { + return; + } + } + } + + // ignore return value from get_obj_attrs in all other cases + op_ret = 0; + + if (check_obj_lock) { + auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION); + if (aiter != attrs.end()) { + RGWObjectRetention obj_retention; + try { + decode(obj_retention, aiter->second); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl; + op_ret = -EIO; + return; + } + if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) > ceph_clock_now()) { + if (obj_retention.get_mode().compare("GOVERNANCE") != 0 || !bypass_perm || !bypass_governance_mode) { + op_ret = -EACCES; + return; + } + } + } + aiter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD); + if (aiter != attrs.end()) { + RGWObjectLegalHold obj_legal_hold; + try { + decode(obj_legal_hold, aiter->second); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl; + op_ret = -EIO; + return; + } + if (obj_legal_hold.is_enabled()) { + op_ret = -EACCES; + return; + } + } + } + + if (multipart_delete) { + const auto slo_attr = attrs.find(RGW_ATTR_SLO_MANIFEST); + + if (slo_attr != attrs.end()) { + op_ret = handle_slo_manifest(slo_attr->second); + if (op_ret < 0) { + ldpp_dout(this, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret << dendl; + } + } else { + op_ret = -ERR_NOT_SLO_MANIFEST; + } + + return; + } + + RGWObjectCtx *obj_ctx = static_cast(s->obj_ctx); + obj_ctx->set_atomic(obj); + + bool ver_restored = false; + op_ret = store->getRados()->swift_versioning_restore(*obj_ctx, s->bucket_owner.get_id(), + s->bucket_info, obj, ver_restored, this, tracer, span); + if (op_ret < 0) { + return; + } + + if (!ver_restored) { + /* Swift's versioning mechanism hasn't found any previous version of + * the object that could be restored. This means we should proceed + * with the regular delete path. */ + RGWRados::Object del_target(store->getRados(), s->bucket_info, *obj_ctx, obj); + RGWRados::Object::Delete del_op(&del_target); + + op_ret = get_system_versioning_params(s, &del_op.params.olh_epoch, + &del_op.params.marker_version_id); + if (op_ret < 0) { + return; + } + + del_op.params.bucket_owner = s->bucket_owner.get_id(); + del_op.params.versioning_status = s->bucket_info.versioning_status(); + del_op.params.obj_owner = s->owner; + del_op.params.unmod_since = unmod_since; + del_op.params.high_precision_time = s->system_request; /* system request uses high precision time */ + + op_ret = del_op.delete_obj(s->yield, tracer, span); + if (op_ret >= 0) { + delete_marker = del_op.result.delete_marker; + version_id = del_op.result.version_id; + } + + /* Check whether the object has expired. Swift API documentation + * stands that we should return 404 Not Found in such case. */ + if (need_object_expiration() && object_is_expired(attrs)) { + op_ret = -ENOENT; + return; + } + } + + if (op_ret == -ECANCELED) { + op_ret = 0; + } + if (op_ret == -ERR_PRECONDITION_FAILED && no_precondition_error) { + op_ret = 0; + } + + // cache the objects tags and metadata into the requests + // so it could be used in the notification mechanism + try { + populate_tags_in_request(s, attrs); + } catch (buffer::error& err) { + ldpp_dout(this, 5) << "WARNING: failed to populate delete request with object tags: " << err.what() << dendl; + } + populate_metadata_in_request(s, attrs); + } else { + op_ret = -EINVAL; + } + + const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), attrs[RGW_ATTR_ETAG].to_str(), + delete_marker && s->object.instance.empty() ? rgw::notify::ObjectRemovedDeleteMarkerCreated : rgw::notify::ObjectRemovedDelete, + store); + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl; + // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed + // this should be global conf (probably returnign a different handler) + // so we don't need to read the configured values before we perform it + } +} + bool RGWCopyObj::parse_copy_location(const boost::string_view& url_src, string& bucket_name, rgw_obj_key& key) @@ -5098,7 +7324,7 @@ int RGWCopyObj::verify_permission() return -EACCES; } else if (e == Effect::Pass && !src_acl.verify_permission(this, *s->auth.identity, s->perm_mask, - RGW_PERM_READ)) { + RGW_PERM_READ)) { return -EACCES; } } else if (!src_acl.verify_permission(this, *s->auth.identity, @@ -5562,7 +7788,7 @@ void RGWPutACLs::execute() void RGWPutLC::execute() { bufferlist bl; - + RGWLifecycleConfiguration_S3 config(s->cct); RGWXMLParser parser; RGWLifecycleConfiguration_S3 new_config(s->cct); @@ -5963,7 +8189,7 @@ void RGWInitMultipart::execute() op_ret = obj_op.write_meta(bl.length(), 0, attrs, s->yield); } while (op_ret == -EEXIST); - + const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), attrs[RGW_ATTR_ETAG].to_str(), rgw::notify::ObjectCreatedPost, store); if (ret < 0) { ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl; @@ -6187,9 +8413,9 @@ void RGWCompleteMultipart::execute() ldpp_dout(this, 0) << "ERROR: compression type was changed during multipart upload (" << cs_info.compression_type << ">>" << obj_part.cs_info.compression_type << ")" << dendl; op_ret = -ERR_INVALID_PART; - return; + return; } - + if (part_compressed) { int64_t new_ofs; // offset in compression data for new part if (cs_info.blocks.size() > 0) @@ -6203,7 +8429,7 @@ void RGWCompleteMultipart::execute() cb.len = block.len; cs_info.blocks.push_back(cb); new_ofs = cb.new_ofs + cb.len; - } + } if (!compressed) cs_info.compression_type = obj_part.cs_info.compression_type; cs_info.orig_size += obj_part.cs_info.orig_size; @@ -6277,7 +8503,7 @@ void RGWCompleteMultipart::execute() } else { ldpp_dout(this, 0) << "WARNING: failed to remove object " << meta_obj << dendl; } - + const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), etag, rgw::notify::ObjectCreatedCompleteMultipartUpload, store); if (ret < 0) { ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl; @@ -6639,7 +8865,7 @@ void RGWDeleteMultiObj::execute() bufferlist etag_bl; const auto etag = obj_state->get_attr(RGW_ATTR_ETAG, etag_bl) ? etag_bl.to_str() : ""; - const auto ret = rgw::notify::publish(s, obj.key, obj_state->size, ceph::real_clock::now(), etag, + const auto ret = rgw::notify::publish(s, obj.key, obj_state->size, ceph::real_clock::now(), etag, del_op.result.delete_marker && s->object.instance.empty() ? rgw::notify::ObjectRemovedDeleteMarkerCreated : rgw::notify::ObjectRemovedDelete, store); if (ret < 0) { @@ -7147,7 +9373,7 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo, return true; } } - + return verify_bucket_permission_no_policy(this, s, s->user_acl.get(), &bacl, RGW_PERM_WRITE); } @@ -7332,7 +9558,7 @@ void RGWBulkUploadOp::execute() return; } - /* Handling the $UPLOAD_PATH accordingly to the Swift's Bulk middleware. See: + /* Handling the $UPLOAD_PATH accordingly to the Swift's Bulk middleware. See: * https://github.com/openstack/swift/blob/2.13.0/swift/common/middleware/bulk.py#L31-L41 */ std::string bucket_path, file_prefix; std::tie(bucket_path, file_prefix) = handle_upload_path(s); @@ -7584,6 +9810,17 @@ RGWHandler::~RGWHandler() { } +int RGWHandler::init(rgw::sal::RGWRadosStore *_store, + struct req_state *_s, + rgw::io::BasicClient *cio,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWHandler::init()",parent_span); + store = _store; + s = _s; + + return 0; +} + int RGWHandler::init(rgw::sal::RGWRadosStore *_store, struct req_state *_s, rgw::io::BasicClient *cio) @@ -7594,6 +9831,21 @@ int RGWHandler::init(rgw::sal::RGWRadosStore *_store, return 0; } +int RGWHandler::do_init_permissions(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_op.cc RGWHandler::do_init_permissions()",parent_span); + int ret = rgw_build_bucket_policies(store, s, tracer, span); + if (ret < 0) { + ldpp_dout(s, 10) << "init_permissions on " << s->bucket + << " failed, ret=" << ret << dendl; + return ret==-ENODATA ? -EACCES : ret; + } + + rgw_build_iam_environment(store, s); + return ret; +} + + int RGWHandler::do_init_permissions() { int ret = rgw_build_bucket_policies(store, s); @@ -7626,6 +9878,26 @@ int RGWHandler::do_read_permissions(RGWOp *op, bool only_bucket) return ret; } +int RGWHandler::do_read_permissions(RGWOp *op, bool only_bucket, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.cc RGWHandler::do_read_permissions", parent_span); + if (only_bucket) { + /* already read bucket info */ + return 0; + } + int ret = rgw_build_object_policies(store, s, op->prefetch_data(), tracer, span); + + if (ret < 0) { + ldpp_dout(op, 10) << "read_permissions on " << s->bucket << ":" + << s->object << " only_bucket=" << only_bucket + << " ret=" << ret << dendl; + if (ret == -ENODATA) + ret = -EACCES; + } + + return ret; +} + int RGWOp::error_handler(int err_no, string *error_content) { return dialect_handler->error_handler(err_no, error_content); } @@ -7756,7 +10028,7 @@ void RGWGetBucketPolicy::execute() s->err.message = "The bucket policy does not exist"; return; } - } + } } void RGWDeleteBucketPolicy::send_response() diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 906b26d236996..07bbf0a09d596 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -56,6 +56,8 @@ #include "services/svc_tier_rados.h" #include "include/ceph_assert.h" +#include "include/tracer.h" + using ceph::crypto::SHA1; @@ -79,23 +81,37 @@ int rgw_op_get_bucket_policy_from_attr(CephContext *cct, RGWBucketInfo& bucket_info, map& bucket_attrs, RGWAccessControlPolicy *policy); +int rgw_op_get_bucket_policy_from_attr(CephContext *cct, + rgw::sal::RGWRadosStore *store, + RGWBucketInfo& bucket_info, + map& bucket_attrs, + RGWAccessControlPolicy *policy, Jager_Tracer&, const Span&); class RGWHandler { protected: rgw::sal::RGWRadosStore* store{nullptr}; struct req_state *s{nullptr}; - + int do_init_permissions(Jager_Tracer&,const Span&); int do_init_permissions(); int do_read_permissions(RGWOp* op, bool only_bucket); + int do_read_permissions(RGWOp* op, bool only_bucket, Jager_Tracer&, const Span&); public: RGWHandler() {} virtual ~RGWHandler(); + virtual int init(rgw::sal::RGWRadosStore* store, + struct req_state* _s, + rgw::io::BasicClient* cio,Jager_Tracer&,const Span&); + virtual int init(rgw::sal::RGWRadosStore* store, struct req_state* _s, rgw::io::BasicClient* cio); + virtual int init_permissions(RGWOp*,Jager_Tracer&,const Span&) { + return 0; + } + virtual int init_permissions(RGWOp*) { return 0; } @@ -104,7 +120,7 @@ class RGWHandler { *new_op = op; return 0; } - + virtual int read_permissions(RGWOp* op,Jager_Tracer&,const Span&) {return 0;} virtual int read_permissions(RGWOp* op) = 0; virtual int authorize(const DoutPrefixProvider* dpp) = 0; virtual int postauth_init() = 0; @@ -119,6 +135,7 @@ class RGWHandler { void rgw_bucket_object_pre_exec(struct req_state *s); +void rgw_bucket_object_pre_exec(struct req_state *s, Jager_Tracer&, const Span&); namespace dmc = rgw::dmclock; @@ -138,6 +155,7 @@ class RGWOp : public DoutPrefixProvider { int do_aws4_auth_completion(); virtual int init_quota(); + virtual int init_quota(Jager_Tracer&,const Span&); public: RGWOp() @@ -152,7 +170,28 @@ class RGWOp : public DoutPrefixProvider { int get_ret() const { return op_ret; } + virtual int init_processing(Jager_Tracer& tracer,const Span& parent_span) { + Span span=tracer.child_span("rgw_op.h RGWOp::init_processing()",parent_span); + if (dialect_handler->supports_quota()) { + op_ret = init_quota(tracer,span); + if (op_ret < 0) + return op_ret; + } + + return 0; + } + virtual int init_processing() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_op.h init_processing", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_op.h init_processing"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (dialect_handler->supports_quota()) { op_ret = init_quota(); if (op_ret < 0) @@ -187,13 +226,23 @@ class RGWOp : public DoutPrefixProvider { return dialect_handler->authorize(this); } virtual int verify_permission() = 0; + virtual int verify_permission(Jager_Tracer&,const Span&) {} virtual int verify_op_mask(); + virtual int verify_op_mask(Jager_Tracer&,const Span&); virtual void pre_exec() {} + virtual void pre_exec(Jager_Tracer&,const Span&) {} virtual void execute() = 0; + virtual void execute(Jager_Tracer&,const Span&) {} virtual void send_response() {} + virtual void send_response(Jager_Tracer& tracer, const Span& parent_span) {} virtual void complete() { send_response(); } + + virtual void complete(Jager_Tracer& tracer, const Span& parent_span) { + send_response(tracer, parent_span); + } + virtual const char* name() const = 0; virtual RGWOpType get_type() { return RGW_OP_UNKNOWN; } @@ -301,6 +350,7 @@ class RGWGetObj : public RGWOp { bool get_legal_hold; int init_common(); + int init_common(Jager_Tracer&,const Span&); public: RGWGetObj() { range_str = NULL; @@ -338,8 +388,11 @@ class RGWGetObj : public RGWOp { } int verify_permission() override; + int verify_permission(Jager_Tracer&,const Span&) override; void pre_exec() override; + void pre_exec(Jager_Tracer&,const Span&) override; void execute() override; + void execute(Jager_Tracer&,const Span&) override; int parse_range(); int read_user_manifest_part( rgw_bucket& bucket, @@ -353,10 +406,14 @@ class RGWGetObj : public RGWOp { int handle_slo_manifest(bufferlist& bl); int get_data_cb(bufferlist& bl, off_t ofs, off_t len); + int get_data_cb(bufferlist& bl, off_t ofs, off_t len, const Span&); virtual int get_params() = 0; + virtual int get_params(Jager_Tracer&, const Span&) = 0; virtual int send_response_data_error() = 0; + virtual int send_response_data_error(const Span&) = 0; virtual int send_response_data(bufferlist& bl, off_t ofs, off_t len) = 0; + virtual int send_response_data(bufferlist& bl, off_t ofs, off_t len, const Span&) = 0; const char* name() const override { return "get_obj"; } RGWOpType get_type() override { return RGW_OP_GET_OBJ; } @@ -621,7 +678,7 @@ class RGWBulkUploadOp : public RGWOp { boost::optional> parse_path(const boost::string_ref& path); - + std::pair handle_upload_path(struct req_state *s); @@ -754,9 +811,12 @@ class RGWListBuckets : public RGWOp { } int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; + void execute(Jager_Tracer&,const Span&) override; void execute() override; virtual int get_params() = 0; + virtual int get_params(Jager_Tracer&, const Span&) = 0; virtual void handle_listing_chunk(rgw::sal::RGWBucketList&& buckets) { /* The default implementation, used by e.g. S3, just generates a new * part of listing and sends it client immediately. Swift can behave @@ -764,9 +824,19 @@ class RGWListBuckets : public RGWOp { * instances of RGWBucketList are buffered and finally reversed. */ return send_response_data(buckets); } + virtual void handle_listing_chunk(rgw::sal::RGWBucketList&& buckets, Jager_Tracer& tracer, const Span& parent_span) { + /* The default implementation, used by e.g. S3, just generates a new + * part of listing and sends it client immediately. Swift can behave + * differently: when the reverse option is requested, all incoming + * instances of RGWBucketList are buffered and finally reversed. */ + return send_response_data(buckets, tracer, parent_span); + } virtual void send_response_begin(bool has_buckets) = 0; + virtual void send_response_begin(bool has_buckets, Jager_Tracer&, const Span&, Span&) = 0; virtual void send_response_data(rgw::sal::RGWBucketList& buckets) = 0; + virtual void send_response_data(rgw::sal::RGWBucketList& buckets, Jager_Tracer&, const Span&) = 0; virtual void send_response_end() = 0; + virtual void send_response_end(Span, const Span&) = 0; void send_response() override {} virtual bool should_get_stats() { return false; } @@ -827,8 +897,8 @@ class RGWListBucket : public RGWOp { protected: rgw::sal::RGWBucket* bucket; string prefix; - rgw_obj_key marker; - rgw_obj_key next_marker; + rgw_obj_key marker; + rgw_obj_key next_marker; rgw_obj_key end_marker; string max_keys; string delimiter; @@ -845,6 +915,7 @@ class RGWListBucket : public RGWOp { int shard_id; int parse_max_keys(); + int parse_max_keys(Jager_Tracer&, const Span&); public: RGWListBucket() : bucket(nullptr), list_versions(false), max(0), @@ -852,15 +923,19 @@ class RGWListBucket : public RGWOp { allow_unordered(false), shard_id(-1) {} ~RGWListBucket() { delete bucket; } int verify_permission() override; + int verify_permission(Jager_Tracer&,const Span&) override; void pre_exec() override; + void pre_exec(Jager_Tracer&,const Span&) override; void execute() override; - + void execute(Jager_Tracer&,const Span&) override; void init(rgw::sal::RGWRadosStore *store, struct req_state *s, RGWHandler *h) override { RGWOp::init(store, s, h); bucket = new rgw::sal::RGWRadosBucket(store, *s->user, s->bucket); } virtual int get_params() = 0; + virtual int get_params(Jager_Tracer&, const Span& parent_span) = 0; void send_response() override = 0; + void send_response(Jager_Tracer&, const Span&) override = 0; const char* name() const override { return "list_bucket"; } RGWOpType get_type() override { return RGW_OP_LIST_BUCKET; } uint32_t op_mask() override { return RGW_OP_TYPE_READ; } @@ -1030,8 +1105,14 @@ class RGWCreateBucket : public RGWOp { } int verify_permission() override; + int verify_permission(Jager_Tracer&,const Span&); + void pre_exec() override; + void pre_exec(Jager_Tracer&,const Span&); + void execute() override; + void execute(Jager_Tracer&,const Span&); + void init(rgw::sal::RGWRadosStore *store, struct req_state *s, RGWHandler *h) override { RGWOp::init(store, s, h); policy.set_ctx(s->cct); @@ -1039,7 +1120,9 @@ class RGWCreateBucket : public RGWOp { s->cct->_conf.get_val("rgw_relaxed_region_enforcement"); } virtual int get_params() { return 0; } + virtual int get_params(Jager_Tracer& tracer, const Span& parent_span) { return 0; } void send_response() override = 0; + void send_response(Jager_Tracer&, const Span&) override {} const char* name() const override { return "create_bucket"; } RGWOpType get_type() override { return RGW_OP_CREATE_BUCKET; } uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } @@ -1053,10 +1136,14 @@ class RGWDeleteBucket : public RGWOp { RGWDeleteBucket() {} int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; void pre_exec() override; + void pre_exec(Jager_Tracer&, const Span&) override; void execute() override; + void execute(Jager_Tracer&, const Span&) override; void send_response() override = 0; + void send_response(Jager_Tracer&, const Span&) override = 0; const char* name() const override { return "delete_bucket"; } RGWOpType get_type() override { return RGW_OP_DELETE_BUCKET; } uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } @@ -1195,8 +1282,13 @@ class RGWPutObj : public RGWOp { } int verify_permission() override; + int verify_permission(Jager_Tracer&,const Span&) override; + void pre_exec() override; + void pre_exec(Jager_Tracer&,const Span&) override; + void execute() override; + void execute(Jager_Tracer&,const Span&) override; /* this is for cases when copying data from other object */ virtual int get_decrypt_filter(std::unique_ptr* filter, @@ -1215,8 +1307,11 @@ class RGWPutObj : public RGWOp { int get_data(const off_t fst, const off_t lst, bufferlist& bl); virtual int get_params() = 0; + virtual int get_params(Jager_Tracer&, const Span&) {} virtual int get_data(bufferlist& bl) = 0; + virtual int get_data(bufferlist& bl,Jager_Tracer&,const Span&) {} void send_response() override = 0; + void send_response(Jager_Tracer&, const Span&) override {} const char* name() const override { return "put_obj"; } RGWOpType get_type() override { return RGW_OP_PUT_OBJ; } uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } @@ -1400,12 +1495,17 @@ class RGWDeleteObj : public RGWOp { } int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; void pre_exec() override; + void pre_exec(Jager_Tracer&, const Span&) override; void execute() override; + void execute(Jager_Tracer&, const Span&) override; int handle_slo_manifest(bufferlist& bl); virtual int get_params() { return 0; } + virtual int get_params(Jager_Tracer& tracer, const Span& parent_span) { return 0; } void send_response() override = 0; + void send_response(Jager_Tracer&, const Span&) override = 0; const char* name() const override { return "delete_obj"; } RGWOpType get_type() override { return RGW_OP_DELETE_OBJ; } uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } @@ -1547,7 +1647,7 @@ class RGWPutACLs : public RGWOp { class RGWGetLC : public RGWOp { protected: - + public: RGWGetLC() { } ~RGWGetLC() override { } @@ -1834,8 +1934,8 @@ struct RGWMultipartUploadEntry { class RGWListBucketMultiparts : public RGWOp { protected: string prefix; - RGWMPObj marker; - RGWMultipartUploadEntry next_marker; + RGWMPObj marker; + RGWMultipartUploadEntry next_marker; int max_uploads; string delimiter; vector uploads; @@ -1956,6 +2056,7 @@ class RGWInfo: public RGWOp { }; extern int rgw_build_bucket_policies(rgw::sal::RGWRadosStore* store, struct req_state* s); +extern int rgw_build_bucket_policies(rgw::sal::RGWRadosStore* store, struct req_state* s,Jager_Tracer&,const Span&); extern int rgw_build_object_policies(rgw::sal::RGWRadosStore *store, struct req_state *s, bool prefetch_data); extern void rgw_build_iam_environment(rgw::sal::RGWRadosStore* store, @@ -2086,18 +2187,35 @@ static inline int rgw_get_request_metadata(CephContext* const cct, return 0; } /* rgw_get_request_metadata */ +static inline int rgw_get_request_metadata(CephContext* const cct, + struct req_info& info, + std::map& attrs, + Jager_Tracer& tracer, const Span& parent_span, + const bool allow_empty_attrs = true) +{ + Span span = tracer.child_span("rgw_op.h rgw_get_request_metadata", parent_span); + return rgw_get_request_metadata(cct, info, attrs, allow_empty_attrs); +} + static inline void encode_delete_at_attr(boost::optional delete_at, map& attrs) { if (delete_at == boost::none) { return; - } + } bufferlist delatbl; encode(*delete_at, delatbl); attrs[RGW_ATTR_DELETE_AT] = delatbl; } /* encode_delete_at_attr */ +static inline void encode_delete_at_attr(boost::optional delete_at, + map& attrs, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.h encode_delete_at_attr", parent_span); + encode_delete_at_attr(delete_at, attrs); +} + static inline void encode_obj_tags_attr(RGWObjTags* obj_tags, map& attrs) { if (obj_tags == nullptr){ @@ -2112,6 +2230,12 @@ static inline void encode_obj_tags_attr(RGWObjTags* obj_tags, map& attrs, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_op.h encode_obj_tags_attr", parent_span); + encode_obj_tags_attr(obj_tags, attrs); +} + static inline int encode_dlo_manifest_attr(const char * const dlo_manifest, map& attrs) { diff --git a/src/rgw/rgw_process.cc b/src/rgw/rgw_process.cc index e314e8b2fb9c5..5f6eb3c7f58aa 100644 --- a/src/rgw/rgw_process.cc +++ b/src/rgw/rgw_process.cc @@ -5,6 +5,7 @@ #include "common/Throttle.h" #include "common/WorkQueue.h" #include "include/scope_guard.h" +#include "include/tracer.h" #include "rgw_rados.h" #include "rgw_dmclock_scheduler.h" @@ -40,6 +41,26 @@ void RGWProcess::RGWWQ::_dump_queue() } } /* RGWProcess::RGWWQ::_dump_queue */ + +auto schedule_request(Scheduler *scheduler, req_state *s, RGWOp *op,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_process.cc schedule_request()",parent_span); + using rgw::dmclock::SchedulerCompleter; + if (!scheduler) + return std::make_pair(0,SchedulerCompleter{}); + + const auto client = op->dmclock_client(); + const auto cost = op->dmclock_cost(); + ldpp_dout(op,10) << "scheduling with dmclock client=" << static_cast(client) + << " cost=" << cost << dendl; + return scheduler->schedule_request(client, {}, + req_state::Clock::to_double(s->time), + cost, + s->yield); +} + + + auto schedule_request(Scheduler *scheduler, req_state *s, RGWOp *op) { using rgw::dmclock::SchedulerCompleter; @@ -82,12 +103,152 @@ void RGWProcess::RGWWQ::_process(RGWRequest *req, ThreadPool::TPHandle &) { perfcounter->inc(l_rgw_qactive, -1); } +int rgw_process_authenticated(RGWHandler_REST * const handler, + RGWOp *& op, + RGWRequest * const req, + req_state * const s, + Jager_Tracer& tracer,const Span& parent_span, + const bool skip_retarget) +{ + bool should_trace=true; + Span span; + RGWOpType type=op->get_type(); + if(type == 0) + should_trace=false; + + if(should_trace){ + span=tracer.child_span(RGWOpTypeMapper[type], parent_span); + span->SetTag("operation_success", true); + span->SetTag("operation_type", RGWOpTypeMapper[type]); + } + + ldpp_dout(op, 2) << "init permissions" << dendl; + int ret; + if(should_trace) + ret = handler->init_permissions(op,tracer,span); + else + ret = handler->init_permissions(op); + if (ret < 0) { + return ret; + } + + /** + * Only some accesses support website mode, and website mode does NOT apply + * if you are using the REST endpoint either (ergo, no authenticated access) + */ + if (! skip_retarget) { + ldpp_dout(op, 2) << "recalculating target" << dendl; + ret = handler->retarget(op, &op); + if (ret < 0) { + return ret; + } + req->op = op; + } else { + ldpp_dout(op, 2) << "retargeting skipped because of SubOp mode" << dendl; + } + + /* If necessary extract object ACL and put them into req_state. */ + ldpp_dout(op, 2) << "reading permissions" << dendl; + if(should_trace) + ret = handler->read_permissions(op,tracer,span); + else + ret = handler->read_permissions(op); + if (ret < 0) { + return ret; + } + + ldpp_dout(op, 2) << "init op" << dendl; + if(should_trace) + ret = op->init_processing(tracer,span); + else + ret = op->init_processing(); + if (ret < 0) { + return ret; + } + + ldpp_dout(op, 2) << "verifying op mask" << dendl; + if(should_trace) + ret = op->verify_op_mask(tracer,span); + else + ret = op->verify_op_mask(); + if (ret < 0) { + return ret; + } + + /* Check if OPA is used to authorize requests */ + if (s->cct->_conf->rgw_use_opa_authz) { + ret = rgw_opa_authorize(op, s); + if (ret < 0) { + return ret; + } + } + + ldpp_dout(op, 2) << "verifying op permissions" << dendl; + if(should_trace) + ret = op->verify_permission(tracer,span); + else + ret = op->verify_permission(); + if (ret < 0) { + if (s->system_request) { + dout(2) << "overriding permissions due to system operation" << dendl; + } else if (s->auth.identity->is_admin_of(s->user->get_id())) { + dout(2) << "overriding permissions due to admin operation" << dendl; + } else { + return ret; + } + } + + ldpp_dout(op, 2) << "verifying op params" << dendl; + ret = op->verify_params(); + if (ret < 0) { + return ret; + } + + ldpp_dout(op, 2) << "pre-executing" << dendl; + if(should_trace) + op->pre_exec(tracer, span); + else + op->pre_exec(); + + ldpp_dout(op, 2) << "executing" << dendl; + if(should_trace) + op->execute(tracer, span); + else + op->execute(); + + ldpp_dout(op, 2) << "completing" << dendl; + if(should_trace) + op->complete(tracer, span); + else + op->complete(); + + return 0; +} + + int rgw_process_authenticated(RGWHandler_REST * const handler, RGWOp *& op, RGWRequest * const req, req_state * const s, const bool skip_retarget) { + RGWOpType type=op->get_type(); + span_structure ss; + // if(s->stack_span.empty()){ + // Span span = tracer_2.new_span(RGWOpTypeMapper[type]); + // ss.set_req_state(s); + // ss.set_span(span); + // } + // else{ + if(type>0){ + Span span = tracer_2.new_span(RGWOpTypeMapper[type]); + // if(type>0){ + span->SetTag("operation_success", true); + span->SetTag("operation_type", RGWOpTypeMapper[type]); + // } + ss.set_req_state(global_state); + ss.set_span(span); + } ldpp_dout(op, 2) << "init permissions" << dendl; int ret = handler->init_permissions(op); if (ret < 0) { @@ -174,7 +335,8 @@ int process_request(rgw::sal::RGWRadosStore* const store, RGWRestfulIO* const client_io, OpsLogSocket* const olog, optional_yield yield, - rgw::dmclock::Scheduler *scheduler, + Jager_Tracer& tracer,const Span& parent_span, + rgw::dmclock::Scheduler *scheduler, int* http_ret) { int ret = client_io->init(g_ceph_context); @@ -189,7 +351,8 @@ int process_request(rgw::sal::RGWRadosStore* const store, struct req_state rstate(g_ceph_context, &rgw_env, &user, req->id); struct req_state *s = &rstate; - + // global_state = nullptr; + global_state = s; RGWObjectCtx rados_ctx(store, s); s->obj_ctx = &rados_ctx; @@ -213,10 +376,17 @@ int process_request(rgw::sal::RGWRadosStore* const store, int init_error = 0; bool should_log = false; RGWRESTMgr *mgr; + // #ifdef WITH_JAEGER + // RGWHandler_REST *handler = rest->get_handler(store, s, + // auth_registry, + // frontend_prefix, + // client_io, &mgr, &init_error,tracer,parent_span); + // #else RGWHandler_REST *handler = rest->get_handler(store, s, auth_registry, frontend_prefix, client_io, &mgr, &init_error); + // #endif rgw::dmclock::SchedulerCompleter c; if (init_error != 0) { abort_early(s, nullptr, init_error, nullptr); @@ -232,7 +402,11 @@ int process_request(rgw::sal::RGWRadosStore* const store, abort_early(s, NULL, -ERR_METHOD_NOT_ALLOWED, handler); goto done; } - std::tie(ret,c) = schedule_request(scheduler, s, op); + // #ifdef WITH_JAEGER + // std::tie(ret,c) = schedule_request(scheduler, s, op,tracer,parent_span); + // #else + std::tie(ret,c) = schedule_request(scheduler, s, op); + // #endif if (ret < 0) { if (ret == -EAGAIN) { ret = -ERR_RATE_LIMITED; @@ -274,8 +448,11 @@ int process_request(rgw::sal::RGWRadosStore* const store, abort_early(s, op, -ERR_USER_SUSPENDED, handler); goto done; } - - ret = rgw_process_authenticated(handler, op, req, s); + // #ifdef WITH_JAEGER + // ret = rgw_process_authenticated(handler, op, req, s, tracer, parent_span); + // #else + ret = rgw_process_authenticated(handler, op, req, s); + // #endif if (ret < 0) { abort_early(s, op, ret, handler); goto done; diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h index 124c2bd1f769c..947e37fb09db3 100644 --- a/src/rgw/rgw_process.h +++ b/src/rgw/rgw_process.h @@ -104,7 +104,7 @@ class RGWProcess { req_wq(this, g_conf()->rgw_op_thread_timeout, g_conf()->rgw_op_thread_suicide_timeout, &m_tp) { } - + virtual ~RGWProcess() = default; virtual void run() = 0; @@ -183,6 +183,24 @@ extern int process_request(rgw::sal::RGWRadosStore* store, optional_yield y, rgw::dmclock::Scheduler *scheduler, int* http_ret = nullptr); +extern int process_request(rgw::sal::RGWRadosStore* store, + RGWREST* rest, + RGWRequest* req, + const std::string& frontend_prefix, + const rgw_auth_registry_t& auth_registry, + RGWRestfulIO* client_io, + OpsLogSocket* olog, + optional_yield y, + Jager_Tracer&,const Span&, + rgw::dmclock::Scheduler *scheduler, + int* http_ret = nullptr); + +extern int rgw_process_authenticated(RGWHandler_REST* handler, + RGWOp*& op, + RGWRequest* req, + req_state* s, + Jager_Tracer&,const Span&, + bool skip_retarget = false); extern int rgw_process_authenticated(RGWHandler_REST* handler, RGWOp*& op, diff --git a/src/rgw/rgw_putobj.h b/src/rgw/rgw_putobj.h index e3d9271548301..9ad0f471e4d53 100644 --- a/src/rgw/rgw_putobj.h +++ b/src/rgw/rgw_putobj.h @@ -16,6 +16,7 @@ #pragma once #include "include/buffer.h" +#include "include/tracer.h" namespace rgw::putobj { @@ -28,6 +29,7 @@ class DataProcessor { // empty bufferlist is given to request that any buffered data be flushed, // though this doesn't wait for completions virtual int process(bufferlist&& data, uint64_t offset) = 0; + virtual int process(bufferlist&& data, uint64_t offset, Jager_Tracer& tracer, const Span&) {} }; // for composing data processors into a pipeline diff --git a/src/rgw/rgw_putobj_processor.cc b/src/rgw/rgw_putobj_processor.cc index dedb3002bb2f6..d77eb92d51b90 100644 --- a/src/rgw/rgw_putobj_processor.cc +++ b/src/rgw/rgw_putobj_processor.cc @@ -59,6 +59,11 @@ int HeadObjectProcessor::process(bufferlist&& data, uint64_t logical_offset) return processor->process(std::move(data), write_offset); } +int HeadObjectProcessor::process(bufferlist&& data, uint64_t logical_offset, Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_putobj_processor.cc HeadObjectProcessor::process", parent_span); + bufferlist dataX = data; + return HeadObjectProcessor::process(std::move(dataX), logical_offset); +} static int process_completed(const AioResultList& completed, RawObjSet *written) { @@ -79,6 +84,12 @@ int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj) return stripe_obj.open(); } +int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_putobj_processor.cc RadosWriter::set_stripe_obj", parent_span); + return RadosWriter::set_stripe_obj(raw_obj); +} + int RadosWriter::process(bufferlist&& bl, uint64_t offset) { bufferlist data = std::move(bl); @@ -271,6 +282,78 @@ int AtomicObjectProcessor::prepare(optional_yield y) return 0; } +int AtomicObjectProcessor::prepare(optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_putobj_processor.cc AtomicObjectProcessor::prepare", parent_span); + uint64_t max_head_chunk_size; + uint64_t head_max_size; + uint64_t chunk_size = 0; + uint64_t alignment; + rgw_pool head_pool; + + if (!store->getRados()->get_obj_data_pool(bucket_info.placement_rule, head_obj, &head_pool, tracer, span)) { + return -EIO; + } + + int r = store->getRados()->get_max_chunk_size(head_pool, &max_head_chunk_size, tracer, span, &alignment); + if (r < 0) { + return r; + } + + bool same_pool = true; + + if (bucket_info.placement_rule != tail_placement_rule) { + rgw_pool tail_pool; + if (!store->getRados()->get_obj_data_pool(tail_placement_rule, head_obj, &tail_pool)) { + return -EIO; + } + + if (tail_pool != head_pool) { + same_pool = false; + + r = store->getRados()->get_max_chunk_size(tail_pool, &chunk_size); + if (r < 0) { + return r; + } + + head_max_size = 0; + } + } + + if (same_pool) { + head_max_size = max_head_chunk_size; + chunk_size = max_head_chunk_size; + } + + uint64_t stripe_size; + const uint64_t default_stripe_size = store->ctx()->_conf->rgw_obj_stripe_size; + + store->getRados()->get_max_aligned_size(default_stripe_size, alignment, &stripe_size, tracer, span); + + manifest.set_trivial_rule(head_max_size, stripe_size); + + r = manifest_gen.create_begin(store->ctx(), &manifest, + bucket_info.placement_rule, + &tail_placement_rule, + head_obj.bucket, head_obj, tracer, span); + if (r < 0) { + return r; + } + + rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store->getRados()); + + r = writer.set_stripe_obj(stripe_obj, tracer, span); + if (r < 0) { + return r; + } + + set_head_chunk_size(head_max_size); + // initialize the processors + chunk = ChunkProcessor(&writer, chunk_size); + stripe = StripeProcessor(&chunk, this, head_max_size); + return 0; +} + int AtomicObjectProcessor::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, @@ -331,6 +414,67 @@ int AtomicObjectProcessor::complete(size_t accounted_size, return 0; } +int AtomicObjectProcessor::complete(size_t accounted_size, + const std::string& etag, + ceph::real_time *mtime, + ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, + const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, + bool *pcanceled, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_putobj_processor.cc AtomicObjectProcessor::complete", parent_span); + int r = writer.drain(); + if (r < 0) { + return r; + } + const uint64_t actual_size = get_actual_size(); + r = manifest_gen.create_next(actual_size); + if (r < 0) { + return r; + } + + obj_ctx.set_atomic(head_obj); + + RGWRados::Object op_target(store->getRados(), bucket_info, obj_ctx, head_obj); + + /* some object types shouldn't be versioned, e.g., multipart parts */ + op_target.set_versioning_disabled(!bucket_info.versioning_enabled()); + + RGWRados::Object::Write obj_op(&op_target); + + obj_op.meta.data = &first_chunk; + obj_op.meta.manifest = &manifest; + obj_op.meta.ptag = &unique_tag; /* use req_id as operation tag */ + obj_op.meta.if_match = if_match; + obj_op.meta.if_nomatch = if_nomatch; + obj_op.meta.mtime = mtime; + obj_op.meta.set_mtime = set_mtime; + obj_op.meta.owner = owner; + obj_op.meta.flags = PUT_OBJ_CREATE; + obj_op.meta.olh_epoch = olh_epoch; + obj_op.meta.delete_at = delete_at; + obj_op.meta.user_data = user_data; + obj_op.meta.zones_trace = zones_trace; + obj_op.meta.modify_tail = true; + + r = obj_op.write_meta(actual_size, accounted_size, attrs, y, tracer, span); + if (r < 0) { + return r; + } + if (!obj_op.meta.canceled) { + // on success, clear the set of objects for deletion + writer.clear_written(); + } + if (pcanceled) { + *pcanceled = obj_op.meta.canceled; + } + return 0; +} + int MultipartObjectProcessor::process_first_chunk(bufferlist&& data, DataProcessor **processor) diff --git a/src/rgw/rgw_putobj_processor.h b/src/rgw/rgw_putobj_processor.h index 322652ed9a496..f8951b646e1b6 100644 --- a/src/rgw/rgw_putobj_processor.h +++ b/src/rgw/rgw_putobj_processor.h @@ -21,6 +21,7 @@ #include "rgw_rados.h" #include "services/svc_rados.h" #include "services/svc_tier_rados.h" +#include "include/tracer.h" namespace rgw { @@ -33,7 +34,7 @@ class ObjectProcessor : public DataProcessor { public: // prepare to start processing object data virtual int prepare(optional_yield y) = 0; - + virtual int prepare(optional_yield y, Jager_Tracer&, const Span&) {} // complete the operation and make its result visible to clients virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, @@ -43,6 +44,15 @@ class ObjectProcessor : public DataProcessor { const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, optional_yield y) = 0; + +virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y, Jager_Tracer&, const Span&) {} }; // an object processor with special handling for the first chunk of the head. @@ -71,6 +81,7 @@ class HeadObjectProcessor : public ObjectProcessor { // cache first chunk for process_first_chunk(), then forward everything else // to the returned processor int process(bufferlist&& data, uint64_t logical_offset) final override; + int process(bufferlist&& data, uint64_t logical_offset, Jager_Tracer&, const Span&) final override; }; @@ -100,7 +111,7 @@ class RadosWriter : public DataProcessor { // change the current stripe object int set_stripe_obj(const rgw_raw_obj& obj); - + int set_stripe_obj(const rgw_raw_obj& obj, Jager_Tracer&, const Span&); // write the data at the given offset of the current stripe object int process(bufferlist&& data, uint64_t stripe_offset) override; @@ -192,6 +203,7 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { // prepare a trivial manifest int prepare(optional_yield y) override; + int prepare(optional_yield y, Jager_Tracer&, const Span&) override; // write the head object atomically in a bucket index transaction int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, @@ -202,6 +214,15 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { rgw_zone_set *zones_trace, bool *canceled, optional_yield y) override; + int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y, Jager_Tracer&, const Span&) override; + }; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 3f6c32c8d4867..ae38bc827c97e 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -714,6 +714,12 @@ void RGWRados::get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size = size - (size % alignment); } +void RGWRados::get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::get_max_aligned_size", parent_span); + RGWRados::get_max_aligned_size(size, alignment, max_size); +} + int RGWRados::get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment) { uint64_t alignment; @@ -735,6 +741,12 @@ int RGWRados::get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, return 0; } +int RGWRados::get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, Jager_Tracer& tracer, const Span& parent_span, uint64_t *palignment) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::get_max_chunk_size", parent_span); + return RGWRados::get_max_chunk_size(pool, max_chunk_size, palignment); +} + int RGWRados::get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment) { @@ -1998,47 +2010,23 @@ int RGWRados::Bucket::List::list_objects_ordered( return 0; } // list_objects_ordered - -/** - * Get listing of the objects in a bucket and allow the results to be out - * of order. - * - * Even though there are key differences with the ordered counterpart, - * the parameters are the same to maintain some compatability. - * - * max: maximum number of results to return - * bucket: bucket to list contents of - * prefix: only return results that match this prefix - * delim: should not be set; if it is we should have indicated an error - * marker: if filled in, begin the listing with this object. - * end_marker: if filled in, end the listing with this object. - * result: the objects are put in here. - * common_prefixes: this is never filled with an unordered list; the param - * is maintained for compatibility - * is_truncated: if number of objects in the bucket is bigger than max, then - * truncated. - */ -int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p, - vector *result, - map *common_prefixes, - bool *is_truncated, - optional_yield y) -{ +int RGWRados::Bucket::List::list_objects_ordered( + int64_t max_p, + vector *result, + map *common_prefixes, + bool *is_truncated, Jager_Tracer& tracer, const Span& parent_span, + optional_yield y) + { RGWRados *store = target->get_store(); CephContext *cct = store->ctx(); int shard_id = target->get_shard_id(); int count = 0; bool truncated = true; - + bool cls_filtered = false; const int64_t max = // protect against memory issues and negative vals std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p)); - - // read a few extra in each call to cls_bucket_list_unordered in - // case some are filtered out due to namespace matching, versioning, - // filtering, etc. - const int64_t max_read_ahead = 100; - const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead)); + int read_ahead = std::max(cct->_conf->rgw_list_bucket_min_readahead, max); result->clear(); @@ -2060,36 +2048,66 @@ int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p, rgw_obj_key prefix_obj(params.prefix); prefix_obj.set_ns(params.ns); string cur_prefix = prefix_obj.get_index_key_name(); + string after_delim_s; /* needed in !params.delim.empty() AND later */ - while (truncated && count <= max) { - std::vector ent_list; - ent_list.reserve(read_ahead); + if (!params.delim.empty()) { + after_delim_s = cls_rgw_after_delim(params.delim); + /* if marker points at a common prefix, fast forward it into its + * upper bound string */ + int delim_pos = cur_marker.name.find(params.delim, cur_prefix.size()); + if (delim_pos >= 0) { + string s = cur_marker.name.substr(0, delim_pos); + s.append(after_delim_s); + cur_marker = s; + } + } - int r = store->cls_bucket_list_unordered(target->get_bucket_info(), - shard_id, - cur_marker, - cur_prefix, - read_ahead, - params.list_versions, - ent_list, - &truncated, - &cur_marker, - y); - if (r < 0) - return r; + rgw_obj_index_key prev_marker; + uint16_t attempt = 0; + while (true) { + ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << + " beginning attempt=" << ++attempt << dendl; - // NB: while regions of ent_list will be sorted, we have no - // guarantee that all items will be sorted since they can cross - // shard boundaries + // this loop is generally expected only to have a single + // iteration; the standard exit is at the bottom of the loop, but + // there's an error condition emergency exit as well - for (auto& entry : ent_list) { + if (attempt > 1 && !(prev_marker < cur_marker)) { + // we've failed to make forward progress + ldout(cct, 0) << "RGWRados::Bucket::List::" << __func__ << + ": ERROR marker failed to make forward progress; attempt=" << attempt << + ", prev_marker=" << prev_marker << + ", cur_marker=" << cur_marker << dendl; + break; + } + prev_marker = cur_marker; + + ent_map_t ent_map; + ent_map.reserve(read_ahead); + int r = store->cls_bucket_list_ordered(target->get_bucket_info(), + shard_id, + cur_marker, + cur_prefix, + params.delim, + read_ahead + 1 - count, + params.list_versions, + attempt, + ent_map, + &truncated, + &cls_filtered, + &cur_marker, + y, tracer, parent_span); + if (r < 0) { + return r; + } + + for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) { + rgw_bucket_dir_entry& entry = eiter->second; rgw_obj_index_key index_key = entry.key; rgw_obj_key obj(index_key); - if (count < max) { - params.marker.set(index_key); - next_marker.set(index_key); - } + ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << + " considering entry " << entry.key << dendl; /* note that parse_raw_oid() here will not set the correct * object's instance, as rgw_obj_index_key encodes that @@ -2104,66 +2122,348 @@ int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p, continue; } + bool matched_ns = (obj.ns == params.ns); if (!params.list_versions && !entry.is_visible()) { continue; } - if (params.enforce_ns && obj.ns != params.ns) { + if (params.enforce_ns && !matched_ns) { + if (!params.ns.empty()) { + /* we've iterated past the namespace we're searching -- done now */ + truncated = false; + goto done; + } + + /* we're not looking at the namespace this object is in, next! */ continue; } if (cur_end_marker_valid && cur_end_marker <= index_key) { - // we're not guaranteed items will come in order, so we have - // to loop through all - continue; + truncated = false; + goto done; } - if (params.filter && !params.filter->filter(obj.name, index_key.name)) + if (count < max) { + params.marker = index_key; + next_marker = index_key; + } + + if (params.filter && + ! params.filter->filter(obj.name, index_key.name)) { continue; + } if (params.prefix.size() && - (0 != obj.name.compare(0, params.prefix.size(), params.prefix))) + 0 != obj.name.compare(0, params.prefix.size(), params.prefix)) { continue; + } + + if (!params.delim.empty()) { + const int delim_pos = obj.name.find(params.delim, params.prefix.size()); + if (delim_pos >= 0) { + // run either the code where delimiter filtering is done a) + // in the OSD/CLS or b) here. + if (cls_filtered) { + // NOTE: this condition is for the newer versions of the + // OSD that does filtering on the CLS side + + // should only find one delimiter at the end if it finds any + // after the prefix + if (delim_pos != + int(obj.name.length() - params.delim.length())) { + ldout(cct, 0) << + "WARNING: found delimiter in place other than the end of " + "the prefix; obj.name=" << obj.name << + ", prefix=" << params.prefix << dendl; + } + if (common_prefixes) { + if (count >= max) { + truncated = true; + goto done; + } + + (*common_prefixes)[obj.name] = true; + count++; + } + + continue; + } else { + // NOTE: this condition is for older versions of the OSD + // that do not filter on the CLS side, so the following code + // must do the filtering; once we reach version 16 of ceph, + // this code can be removed along with the conditional that + // can lead this way + + /* extract key -with trailing delimiter- for CommonPrefix */ + string prefix_key = + obj.name.substr(0, delim_pos + params.delim.length()); + + if (common_prefixes && + common_prefixes->find(prefix_key) == common_prefixes->end()) { + if (count >= max) { + truncated = true; + goto done; + } + next_marker = prefix_key; + (*common_prefixes)[prefix_key] = true; + + count++; + } + + continue; + } // if we're running an older OSD version + } // if a delimiter was found after prefix + } // if a delimiter was passed in if (count >= max) { truncated = true; goto done; } + ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << + " adding entry " << entry.key << " to result" << dendl; + result->emplace_back(std::move(entry)); count++; - } // for (auto& entry : ent_list) - } // while (truncated && count <= max) + } // eiter for loop -done: - if (is_truncated) - *is_truncated = truncated; + // NOTE: the following conditional is needed by older versions of + // the OSD that don't do delimiter filtering on the CLS side; once + // we reach version 16 of ceph, the following conditional and the + // code within can be removed + if (!cls_filtered && !params.delim.empty()) { + int marker_delim_pos = + cur_marker.name.find(params.delim, cur_prefix.size()); + if (marker_delim_pos >= 0) { + std::string skip_after_delim = + cur_marker.name.substr(0, marker_delim_pos); + skip_after_delim.append(after_delim_s); - return 0; -} // list_objects_unordered + ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl; + if (skip_after_delim > cur_marker.name) { + cur_marker = skip_after_delim; + ldout(cct, 20) << "setting cur_marker=" + << cur_marker.name + << "[" << cur_marker.instance << "]" + << dendl; + } + } + } // if older osd didn't do delimiter filtering -/** - * create a rados pool, associated meta info - * returns 0 on success, -ERR# otherwise. - */ -int RGWRados::create_pool(const rgw_pool& pool) -{ - librados::IoCtx io_ctx; - constexpr bool create = true; - return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, create); -} + ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << + " INFO end of outer loop, truncated=" << truncated << + ", count=" << count << ", attempt=" << attempt << dendl; -void RGWRados::create_bucket_id(string *bucket_id) -{ - uint64_t iid = instance_id(); - uint64_t bid = next_bucket_id(); - char buf[svc.zone->get_zone_params().get_id().size() + 48]; + if (!truncated || count >= (max + 1) / 2) { + // if we finished listing, or if we're returning at least half the + // requested entries, that's enough; S3 and swift protocols allow + // returning fewer than max entries + break; + } else if (attempt > 8 && count >= 1) { + // if we've made at least 8 attempts and we have some, but very + // few, results, return with what we have + break; + } + + ldout(cct, 1) << "RGWRados::Bucket::List::" << __func__ << + " INFO ordered bucket listing requires read #" << (1 + attempt) << + dendl; + } // read attempt loop + +done: + + if (is_truncated) { + *is_truncated = truncated; + } + + return 0; + } + +/** + * Get listing of the objects in a bucket and allow the results to be out + * of order. + * + * Even though there are key differences with the ordered counterpart, + * the parameters are the same to maintain some compatability. + * + * max: maximum number of results to return + * bucket: bucket to list contents of + * prefix: only return results that match this prefix + * delim: should not be set; if it is we should have indicated an error + * marker: if filled in, begin the listing with this object. + * end_marker: if filled in, end the listing with this object. + * result: the objects are put in here. + * common_prefixes: this is never filled with an unordered list; the param + * is maintained for compatibility + * is_truncated: if number of objects in the bucket is bigger than max, then + * truncated. + */ +int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p, + vector *result, + map *common_prefixes, + bool *is_truncated, + optional_yield y) +{ + RGWRados *store = target->get_store(); + CephContext *cct = store->ctx(); + int shard_id = target->get_shard_id(); + + int count = 0; + bool truncated = true; + + const int64_t max = // protect against memory issues and negative vals + std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p)); + + // read a few extra in each call to cls_bucket_list_unordered in + // case some are filtered out due to namespace matching, versioning, + // filtering, etc. + const int64_t max_read_ahead = 100; + const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead)); + + result->clear(); + + // use a local marker; either the marker will have a previous entry + // or it will be empty; either way it's OK to copy + rgw_obj_key marker_obj(params.marker.name, + params.marker.instance, + params.marker.ns); + rgw_obj_index_key cur_marker; + marker_obj.get_index_key(&cur_marker); + + rgw_obj_key end_marker_obj(params.end_marker.name, + params.end_marker.instance, + params.end_marker.ns); + rgw_obj_index_key cur_end_marker; + end_marker_obj.get_index_key(&cur_end_marker); + const bool cur_end_marker_valid = !params.end_marker.empty(); + + rgw_obj_key prefix_obj(params.prefix); + prefix_obj.set_ns(params.ns); + string cur_prefix = prefix_obj.get_index_key_name(); + + while (truncated && count <= max) { + std::vector ent_list; + ent_list.reserve(read_ahead); + + int r = store->cls_bucket_list_unordered(target->get_bucket_info(), + shard_id, + cur_marker, + cur_prefix, + read_ahead, + params.list_versions, + ent_list, + &truncated, + &cur_marker, + y); + if (r < 0) + return r; + + // NB: while regions of ent_list will be sorted, we have no + // guarantee that all items will be sorted since they can cross + // shard boundaries + + for (auto& entry : ent_list) { + rgw_obj_index_key index_key = entry.key; + rgw_obj_key obj(index_key); + + if (count < max) { + params.marker.set(index_key); + next_marker.set(index_key); + } + + /* note that parse_raw_oid() here will not set the correct + * object's instance, as rgw_obj_index_key encodes that + * separately. We don't need to set the instance because it's + * not needed for the checks here and we end up using the raw + * entry for the return vector + */ + bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj); + if (!valid) { + ldout(cct, 0) << "ERROR: could not parse object name: " << + obj.name << dendl; + continue; + } + + if (!params.list_versions && !entry.is_visible()) { + continue; + } + + if (params.enforce_ns && obj.ns != params.ns) { + continue; + } + + if (cur_end_marker_valid && cur_end_marker <= index_key) { + // we're not guaranteed items will come in order, so we have + // to loop through all + continue; + } + + if (params.filter && !params.filter->filter(obj.name, index_key.name)) + continue; + + if (params.prefix.size() && + (0 != obj.name.compare(0, params.prefix.size(), params.prefix))) + continue; + + if (count >= max) { + truncated = true; + goto done; + } + + result->emplace_back(std::move(entry)); + count++; + } // for (auto& entry : ent_list) + } // while (truncated && count <= max) + +done: + if (is_truncated) + *is_truncated = truncated; + + return 0; +} // list_objects_unordered + +int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p, + vector *result, + map *common_prefixes, + bool *is_truncated, Jager_Tracer& tracer, const Span& parent_span, + optional_yield y) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Bucket::List::list_objects_unordered", parent_span); + return RGWRados::Bucket::List::list_objects_unordered(max_p, + result, + common_prefixes, + is_truncated, + y); +} + +/** + * create a rados pool, associated meta info + * returns 0 on success, -ERR# otherwise. + */ +int RGWRados::create_pool(const rgw_pool& pool) +{ + librados::IoCtx io_ctx; + constexpr bool create = true; + return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, create); +} + +void RGWRados::create_bucket_id(string *bucket_id) +{ + uint64_t iid = instance_id(); + uint64_t bid = next_bucket_id(); + char buf[svc.zone->get_zone_params().get_id().size() + 48]; snprintf(buf, sizeof(buf), "%s.%" PRIu64 ".%" PRIu64, svc.zone->get_zone_params().get_id().c_str(), iid, bid); *bucket_id = buf; } +void RGWRados::create_bucket_id(string *bucket_id, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::create_bucket_id", parent_span); + RGWRados::create_bucket_id(bucket_id); +} + int RGWRados::create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket, const string& zonegroup_id, const rgw_placement_rule& placement_rule, @@ -2275,42 +2575,174 @@ int RGWRados::create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket, return -ENOENT; } -bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) -{ - return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool); -} - -bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj) +int RGWRados::create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket, + const string& zonegroup_id, + const rgw_placement_rule& placement_rule, + const string& swift_ver_location, + const RGWQuotaInfo * pquota_info, + map& attrs, + RGWBucketInfo& info, + obj_version *pobjv, + obj_version *pep_objv, + real_time creation_time, + rgw_bucket *pmaster_bucket, + uint32_t *pmaster_num_shards, + Jager_Tracer& tracer, const Span& parent_span, + bool exclusive) { - get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); - - return get_obj_data_pool(placement_rule, obj, &raw_obj->pool); -} + Span span = tracer.child_span("rgw_rados.cc RGWRados::create_bucket", parent_span); -int RGWRados::get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx) -{ - string oid, key; - get_obj_bucket_and_oid_loc(obj, oid, key); + #define MAX_CREATE_RETRIES 20 /* need to bound retries */ + rgw_placement_rule selected_placement_rule; + RGWZonePlacementInfo rule_info; - rgw_pool pool; - if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) { - ldout(cct, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl; - return -EIO; - } + for (int i = 0; i < MAX_CREATE_RETRIES; i++) { + int ret = 0; + ret = svc.zone->select_bucket_placement(owner, zonegroup_id, placement_rule, + &selected_placement_rule, &rule_info, tracer, span); + if (ret < 0) + return ret; - int r = open_pool_ctx(pool, *ioctx, false); - if (r < 0) { - return r; - } + if (!pmaster_bucket) { + create_bucket_id(&bucket.marker, tracer, span); + bucket.bucket_id = bucket.marker; + } else { + bucket.marker = pmaster_bucket->marker; + bucket.bucket_id = pmaster_bucket->bucket_id; + } - ioctx->locator_set_key(key); + RGWObjVersionTracker& objv_tracker = info.objv_tracker; - return 0; -} + objv_tracker.read_version.clear(); -int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref) -{ - get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc); + if (pobjv) { + objv_tracker.write_version = *pobjv; + } else { + objv_tracker.generate_new_write_ver(cct); + } + + info.bucket = bucket; + info.owner = owner.user_id; + info.zonegroup = zonegroup_id; + info.placement_rule = selected_placement_rule; + info.index_type = rule_info.index_type; + info.swift_ver_location = swift_ver_location; + info.swift_versioning = (!swift_ver_location.empty()); + if (pmaster_num_shards) { + info.num_shards = *pmaster_num_shards; + } else { + info.num_shards = bucket_index_max_shards; + } + info.bucket_index_shard_hash_type = RGWBucketInfo::MOD; + info.requester_pays = false; + if (real_clock::is_zero(creation_time)) { + info.creation_time = ceph::real_clock::now(); + } else { + info.creation_time = creation_time; + } + if (pquota_info) { + info.quota = *pquota_info; + } + + int r = svc.bi->init_index(info); + if (r < 0) { + return r; + } + + ret = put_linked_bucket_info(info, exclusive, ceph::real_time(), pep_objv, &attrs, true, tracer, span); + if (ret == -ECANCELED) { + ret = -EEXIST; + } + if (ret == -EEXIST) { + /* we need to reread the info and return it, caller will have a use for it */ + RGWBucketInfo orig_info; + r = get_bucket_info(&svc, bucket.tenant, bucket.name, orig_info, NULL, null_yield, NULL); + if (r < 0) { + if (r == -ENOENT) { + continue; + } + ldout(cct, 0) << "get_bucket_info returned " << r << dendl; + return r; + } + + /* only remove it if it's a different bucket instance */ + if (orig_info.bucket.bucket_id != bucket.bucket_id) { + int r = svc.bi->clean_index(info); + if (r < 0) { + ldout(cct, 0) << "WARNING: could not remove bucket index (r=" << r << ")" << dendl; + } + r = ctl.bucket->remove_bucket_instance_info(info.bucket, info, null_yield); + if (r < 0) { + ldout(cct, 0) << "WARNING: " << __func__ << "(): failed to remove bucket instance info: bucket instance=" << info.bucket.get_key() << ": r=" << r << dendl; + /* continue anyway */ + } + } + + info = std::move(orig_info); + /* ret == -EEXIST here */ + } + return ret; + } + + /* this is highly unlikely */ + ldout(cct, 0) << "ERROR: could not create bucket, continuously raced with bucket creation and removal" << dendl; + return -ENOENT; +} + +bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) +{ + return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool); +} + +bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::get_obj_data_pool", parent_span); + return RGWRados::get_obj_data_pool(placement_rule, obj, pool); +} + +bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj) +{ + get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); + + return get_obj_data_pool(placement_rule, obj, &raw_obj->pool); +} + +bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::obj_to_raw", parent_span); + return RGWRados::obj_to_raw(placement_rule, obj, raw_obj); +} + +int RGWRados::get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx) +{ + string oid, key; + get_obj_bucket_and_oid_loc(obj, oid, key); + + rgw_pool pool; + if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) { + ldout(cct, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl; + return -EIO; + } + + int r = open_pool_ctx(pool, *ioctx, false); + if (r < 0) { + return r; + } + + ioctx->locator_set_key(key); + + return 0; +} + +int RGWRados::get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::get_obj_head_ioctx", parent_span); + return RGWRados::get_obj_head_ioctx(bucket_info, obj, ioctx); +} + +int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref) +{ + get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc); rgw_pool pool; if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) { @@ -2332,6 +2764,31 @@ int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& return 0; } +int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::get_obj_head_ref", parent_span); + get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc); + + rgw_pool pool; + if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) { + ldout(cct, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl; + return -EIO; + } + + ref->pool = svc.rados->pool(pool); + + int r = ref->pool.open( tracer, span, RGWSI_RADOS::OpenParams() + .set_mostly_omap(false)); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed opening data pool (pool=" << pool << "); r=" << r << dendl; + return r; + } + + ref->pool.ioctx().locator_set_key(ref->obj.loc); + + return 0; +} + int RGWRados::get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref) { ref->obj = obj; @@ -2826,6 +3283,99 @@ int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx, return r; } +int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx, + const rgw_user& user, + RGWBucketInfo& bucket_info, + rgw_obj& obj, + const DoutPrefixProvider *dpp, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::swift_versioning_copy", parent_span); + if (! swift_versioning_enabled(bucket_info)) { + return 0; + } + + obj_ctx.set_atomic(obj); + + RGWObjState * state = nullptr; + int r = get_obj_state(&obj_ctx, bucket_info, obj, &state, false, y); + if (r < 0) { + return r; + } + + if (!state->exists) { + return 0; + } + + const string& src_name = obj.get_oid(); + char buf[src_name.size() + 32]; + struct timespec ts = ceph::real_clock::to_timespec(state->mtime); + snprintf(buf, sizeof(buf), "%03x%s/%lld.%06ld", (int)src_name.size(), + src_name.c_str(), (long long)ts.tv_sec, ts.tv_nsec / 1000); + + RGWBucketInfo dest_bucket_info; + + r = get_bucket_info(&svc, bucket_info.bucket.tenant, bucket_info.swift_ver_location, dest_bucket_info, NULL, null_yield, NULL); + if (r < 0) { + ldout(cct, 10) << "failed to read dest bucket info: r=" << r << dendl; + if (r == -ENOENT) { + return -ERR_PRECONDITION_FAILED; + } + return r; + } + + if (dest_bucket_info.owner != bucket_info.owner) { + return -ERR_PRECONDITION_FAILED; + } + + rgw_obj dest_obj(dest_bucket_info.bucket, buf); + + if (dest_bucket_info.versioning_enabled()){ + gen_rand_obj_instance_name(&dest_obj); + } + + obj_ctx.set_atomic(dest_obj); + + rgw_zone_id no_zone; + + r = copy_obj(obj_ctx, + user, + NULL, /* req_info *info */ + no_zone, + dest_obj, + obj, + dest_bucket_info, + bucket_info, + bucket_info.placement_rule, + NULL, /* time_t *src_mtime */ + NULL, /* time_t *mtime */ + NULL, /* const time_t *mod_ptr */ + NULL, /* const time_t *unmod_ptr */ + false, /* bool high_precision_time */ + NULL, /* const char *if_match */ + NULL, /* const char *if_nomatch */ + RGWRados::ATTRSMOD_NONE, + true, /* bool copy_if_newer */ + state->attrset, + RGWObjCategory::Main, + 0, /* uint64_t olh_epoch */ + real_time(), /* time_t delete_at */ + NULL, /* string *version_id */ + NULL, /* string *ptag */ + NULL, /* string *petag */ + NULL, /* void (*progress_cb)(off_t, void *) */ + NULL, /* void *progress_data */ + dpp, + null_yield); + if (r == -ECANCELED || r == -ENOENT) { + /* Has already been overwritten, meaning another rgw process already + * copied it out */ + return 0; + } + + return r; +} + int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, const rgw_user& user, RGWBucketInfo& bucket_info, @@ -2936,6 +3486,18 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, handler); } +int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, + const rgw_user& user, + RGWBucketInfo& bucket_info, + rgw_obj& obj, + bool& restored, /* out */ + const DoutPrefixProvider *dpp, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::swift_versioning_restore", parent_span); + return RGWRados::swift_versioning_restore(obj_ctx, user, bucket_info, obj, restored, dpp); + +} + int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_size, map& attrs, bool assume_noent, bool modify_tail, @@ -3245,6 +3807,13 @@ int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, return r; } +int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size, + map& attrs, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::Write::write_meta", parent_span); + return RGWRados::Object::Write::write_meta(size, accounted_size, attrs, y); +} + class RGWRadosPutObj : public RGWHTTPStreamRWRequest::ReceiveCB { CephContext* cct; @@ -4478,6 +5047,16 @@ int RGWRados::transition_obj(RGWObjectCtx& obj_ctx, int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rados.cc RGWRados::check_bucket_empty", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rados.cc RGWRados::check_bucket_empty"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif constexpr uint NUM_ENTRIES = 1000u; rgw_obj_index_key marker; @@ -4514,23 +5093,73 @@ int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y) return 0; } - -/** - * Delete a bucket. - * bucket: the name of the bucket to delete - * Returns 0 on success, -ERR# otherwise. - */ -int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, bool check_empty) + +int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) { - const rgw_bucket& bucket = bucket_info.bucket; - RGWSI_RADOS::Pool index_pool; - map bucket_objs; - int r = svc.bi_rados->open_bucket_index(bucket_info, std::nullopt, &index_pool, &bucket_objs, nullptr); - if (r < 0) - return r; - - if (check_empty) { - r = check_bucket_empty(bucket_info, y); + Span span = tracer.child_span("rgw_rados.cc RGWRados::check_bucket_empty", parent_span); + constexpr uint NUM_ENTRIES = 1000u; + + rgw_obj_index_key marker; + string prefix; + bool is_truncated; + + do { + std::vector ent_list; + ent_list.reserve(NUM_ENTRIES); + + int r = cls_bucket_list_unordered(bucket_info, + RGW_NO_SHARD, + marker, + prefix, + NUM_ENTRIES, + true, + ent_list, + &is_truncated, + &marker, + y, tracer, span); + if (r < 0) { + return r; + } + + string ns; + for (auto const& dirent : ent_list) { + rgw_obj_key obj; + + if (rgw_obj_key::oid_to_key_in_ns(dirent.key.name, &obj, ns)) { + return -ENOTEMPTY; + } + } + } while (is_truncated); + + return 0; +} + +/** + * Delete a bucket. + * bucket: the name of the bucket to delete + * Returns 0 on success, -ERR# otherwise. + */ +int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, bool check_empty) +{ + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rados.cc RGWRados::delete_bucket", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rados.cc RGWRados::delete_bucket"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + const rgw_bucket& bucket = bucket_info.bucket; + RGWSI_RADOS::Pool index_pool; + map bucket_objs; + int r = svc.bi_rados->open_bucket_index(bucket_info, std::nullopt, &index_pool, &bucket_objs, nullptr); + if (r < 0) + return r; + + if (check_empty) { + r = check_bucket_empty(bucket_info, y); if (r < 0) { return r; } @@ -4585,6 +5214,72 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob return 0; } +int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, Jager_Tracer& tracer, const Span& parent_span, bool check_empty) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::delete_bucket", parent_span); + const rgw_bucket& bucket = bucket_info.bucket; + RGWSI_RADOS::Pool index_pool; + map bucket_objs; + int r = svc.bi_rados->open_bucket_index(bucket_info, std::nullopt, &index_pool, &bucket_objs, nullptr); + if (r < 0) + return r; + + if (check_empty) { + r = check_bucket_empty(bucket_info, y); + if (r < 0) { + return r; + } + } + + bool remove_ep = true; + + if (objv_tracker.read_version.empty()) { + RGWBucketEntryPoint ep; + r = ctl.bucket->read_bucket_entrypoint_info(bucket_info.bucket, + &ep, + null_yield, + RGWBucketCtl::Bucket::GetParams() + .set_objv_tracker(&objv_tracker)); + if (r < 0 || + (!bucket_info.bucket.bucket_id.empty() && + ep.bucket.bucket_id != bucket_info.bucket.bucket_id)) { + if (r != -ENOENT) { + ldout(cct, 0) << "ERROR: read_bucket_entrypoint_info() bucket=" << bucket_info.bucket << " returned error: r=" << r << dendl; + /* we have no idea what caused the error, will not try to remove it */ + } + /* + * either failed to read bucket entrypoint, or it points to a different bucket instance than + * requested + */ + remove_ep = false; + } + } + + if (remove_ep) { + r = ctl.bucket->remove_bucket_entrypoint_info(bucket_info.bucket, null_yield, tracer, span, + RGWBucketCtl::Bucket::RemoveParams() + .set_objv_tracker(&objv_tracker)); + if (r < 0) + return r; + } + + /* if the bucket is not synced we can remove the meta file */ + if (!svc.zone->is_syncing_bucket_meta(bucket)) { + RGWObjVersionTracker objv_tracker; + r = ctl.bucket->remove_bucket_instance_info(bucket, bucket_info, null_yield, tracer, span); + if (r < 0) { + return r; + } + + /* remove bucket index objects asynchronously by best effort */ + (void) CLSRGWIssueBucketIndexClean(index_pool.ioctx(), + bucket_objs, + cct->_conf->rgw_bucket_index_max_aio)(); + } + + return 0; +} + int RGWRados::set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner) { RGWBucketInfo info; @@ -4684,6 +5379,12 @@ int RGWRados::Object::complete_atomic_modification() return 0; } +int RGWRados::Object::complete_atomic_modification(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc int RGWRados::Object::complete_atomic_modification", parent_span); + return RGWRados::Object::complete_atomic_modification(); +} + void RGWRados::update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain) { RGWObjManifest::obj_iterator iter; @@ -5077,98 +5778,294 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y) return 0; } -int RGWRados::delete_obj(RGWObjectCtx& obj_ctx, - const RGWBucketInfo& bucket_info, - const rgw_obj& obj, - int versioning_status, - uint16_t bilog_flags, - const real_time& expiration_time, - rgw_zone_set *zones_trace) +int RGWRados::Object::Delete::delete_obj(optional_yield y, Jager_Tracer& tracer, const Span& parent_span) { - RGWRados::Object del_target(this, bucket_info, obj_ctx, obj); - RGWRados::Object::Delete del_op(&del_target); - - del_op.params.bucket_owner = bucket_info.owner; - del_op.params.versioning_status = versioning_status; - del_op.params.bilog_flags = bilog_flags; - del_op.params.expiration_time = expiration_time; - del_op.params.zones_trace = zones_trace; - - return del_op.delete_obj(null_yield); -} + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::Delete::delete_obj", parent_span); + RGWRados *store = target->get_store(); + rgw_obj& src_obj = target->get_obj(); + const string& instance = src_obj.key.instance; + rgw_obj obj = src_obj; -int RGWRados::delete_raw_obj(const rgw_raw_obj& obj) -{ - rgw_rados_ref ref; - int r = get_raw_obj_ref(obj, &ref); - if (r < 0) { - return r; + if (instance == "null") { + obj.key.instance.clear(); } - ObjectWriteOperation op; + bool explicit_marker_version = (!params.marker_version_id.empty()); - op.remove(); - r = rgw_rados_operate(ref.pool.ioctx(), ref.obj.oid, &op, null_yield); - if (r < 0) - return r; + if (params.versioning_status & BUCKET_VERSIONED || explicit_marker_version) { + if (instance.empty() || explicit_marker_version) { + rgw_obj marker = obj; - return 0; -} + if (!params.marker_version_id.empty()) { + if (params.marker_version_id != "null") { + marker.key.set_instance(params.marker_version_id); + } + } else if ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == 0) { + store->gen_rand_obj_instance_name(&marker); + } -int RGWRados::delete_obj_index(const rgw_obj& obj, ceph::real_time mtime) -{ - std::string oid, key; - get_obj_bucket_and_oid_loc(obj, oid, key); + result.version_id = marker.key.instance; + if (result.version_id.empty()) + result.version_id = "null"; + result.delete_marker = true; - auto obj_ctx = svc.sysobj->init_obj_ctx(); + struct rgw_bucket_dir_entry_meta meta; - RGWBucketInfo bucket_info; - int ret = get_bucket_instance_info(obj_ctx, obj.bucket, bucket_info, NULL, NULL, null_yield); - if (ret < 0) { - ldout(cct, 0) << "ERROR: " << __func__ << "() get_bucket_instance_info(bucket=" << obj.bucket << ") returned ret=" << ret << dendl; - return ret; - } + meta.owner = params.obj_owner.get_id().to_str(); + meta.owner_display_name = params.obj_owner.get_display_name(); - RGWRados::Bucket bop(this, bucket_info); - RGWRados::Bucket::UpdateIndex index_op(&bop, obj); + if (real_clock::is_zero(params.mtime)) { + meta.mtime = real_clock::now(); + } else { + meta.mtime = params.mtime; + } - return index_op.complete_del(-1 /* pool */, 0, mtime, NULL); -} + int r = store->set_olh(target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, y, params.zones_trace); + if (r < 0) { + return r; + } + } else { + rgw_bucket_dir_entry dirent; -static void generate_fake_tag(RGWRados *store, map& attrset, RGWObjManifest& manifest, bufferlist& manifest_bl, bufferlist& tag_bl) -{ - string tag; + int r = store->bi_get_instance(target->get_bucket_info(), obj, &dirent); + if (r < 0) { + return r; + } + result.delete_marker = dirent.is_delete_marker(); + r = store->unlink_obj_instance(target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, y, params.zones_trace); + if (r < 0) { + return r; + } + result.version_id = instance; + } - RGWObjManifest::obj_iterator mi = manifest.obj_begin(); - if (mi != manifest.obj_end()) { - if (manifest.has_tail()) // first object usually points at the head, let's skip to a more unique part - ++mi; - tag = mi.get_location().get_raw_obj(store).oid; - tag.append("_"); - } + BucketShard *bs; + int r = target->get_bucket_shard(&bs); + if (r < 0) { + ldout(store->ctx(), 5) << "failed to get BucketShard object: r=" << r << dendl; + return r; + } - unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char md5_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; - MD5 hash; - hash.Update((const unsigned char *)manifest_bl.c_str(), manifest_bl.length()); + r = store->svc.datalog_rados->add_entry(target->bucket_info, bs->shard_id); + if (r < 0) { + lderr(store->ctx()) << "ERROR: failed writing data log" << dendl; + return r; + } - map::iterator iter = attrset.find(RGW_ATTR_ETAG); - if (iter != attrset.end()) { - bufferlist& bl = iter->second; - hash.Update((const unsigned char *)bl.c_str(), bl.length()); + return 0; } - hash.Final(md5); - buf_to_hex(md5, CEPH_CRYPTO_MD5_DIGESTSIZE, md5_str); - tag.append(md5_str); + rgw_rados_ref ref; + int r = store->get_obj_head_ref(target->get_bucket_info(), obj, &ref, tracer, span); + if (r < 0) { + return r; + } - ldout(store->ctx(), 10) << "generate_fake_tag new tag=" << tag << dendl; + RGWObjState *state; + r = target->get_state(&state, false, y); + if (r < 0) + return r; - tag_bl.append(tag.c_str(), tag.size() + 1); -} + ObjectWriteOperation op; -static bool is_olh(map& attrs) -{ + if (!real_clock::is_zero(params.unmod_since)) { + struct timespec ctime = ceph::real_clock::to_timespec(state->mtime); + struct timespec unmod = ceph::real_clock::to_timespec(params.unmod_since); + if (!params.high_precision_time) { + ctime.tv_nsec = 0; + unmod.tv_nsec = 0; + } + + ldout(store->ctx(), 10) << "If-UnModified-Since: " << params.unmod_since << " Last-Modified: " << ctime << dendl; + if (ctime > unmod) { + return -ERR_PRECONDITION_FAILED; + } + + /* only delete object if mtime is less than or equal to params.unmod_since */ + store->cls_obj_check_mtime(op, params.unmod_since, params.high_precision_time, CLS_RGW_CHECK_TIME_MTIME_LE); + } + uint64_t obj_accounted_size = state->accounted_size; + + if(params.abortmp) { + obj_accounted_size = params.parts_accounted_size; + } + + if (!real_clock::is_zero(params.expiration_time)) { + bufferlist bl; + real_time delete_at; + + if (state->get_attr(RGW_ATTR_DELETE_AT, bl)) { + try { + auto iter = bl.cbegin(); + decode(delete_at, iter); + } catch (buffer::error& err) { + ldout(store->ctx(), 0) << "ERROR: couldn't decode RGW_ATTR_DELETE_AT" << dendl; + return -EIO; + } + + if (params.expiration_time != delete_at) { + return -ERR_PRECONDITION_FAILED; + } + } else { + return -ERR_PRECONDITION_FAILED; + } + } + + if (!state->exists) { + target->invalidate_state(); + return -ENOENT; + } + + r = target->prepare_atomic_modification(op, false, NULL, NULL, NULL, true, false, y, tracer, span); + if (r < 0) + return r; + + RGWBucketInfo& bucket_info = target->get_bucket_info(); + + RGWRados::Bucket bop(store, bucket_info); + RGWRados::Bucket::UpdateIndex index_op(&bop, obj); + + index_op.set_zones_trace(params.zones_trace); + index_op.set_bilog_flags(params.bilog_flags); + + r = index_op.prepare(CLS_RGW_OP_DEL, &state->write_tag, y, tracer , span); + if (r < 0) + return r; + + store->remove_rgw_head_obj(op); + + auto& ioctx = ref.pool.ioctx(); + r = rgw_rados_operate(ioctx, ref.obj.oid, &op, null_yield, tracer, span); + + /* raced with another operation, object state is indeterminate */ + const bool need_invalidate = (r == -ECANCELED); + + int64_t poolid = ioctx.get_id(); + if (r >= 0) { + tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache(); + if (obj_tombstone_cache) { + tombstone_entry entry{*state}; + obj_tombstone_cache->add(obj, entry); + } + r = index_op.complete_del(poolid, ioctx.get_last_version(), state->mtime, params.remove_objs); + + int ret = target->complete_atomic_modification(tracer, span); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned ret=" << ret << dendl; + } + /* other than that, no need to propagate error */ + } else { + int ret = index_op.cancel(); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl; + } + } + + if (need_invalidate) { + target->invalidate_state(); + } + + if (r < 0) + return r; + + /* update quota cache */ + store->quota_handler->update_stats(params.bucket_owner, obj.bucket, -1, 0, obj_accounted_size); + + return 0; +} + +int RGWRados::delete_obj(RGWObjectCtx& obj_ctx, + const RGWBucketInfo& bucket_info, + const rgw_obj& obj, + int versioning_status, + uint16_t bilog_flags, + const real_time& expiration_time, + rgw_zone_set *zones_trace) +{ + RGWRados::Object del_target(this, bucket_info, obj_ctx, obj); + RGWRados::Object::Delete del_op(&del_target); + + del_op.params.bucket_owner = bucket_info.owner; + del_op.params.versioning_status = versioning_status; + del_op.params.bilog_flags = bilog_flags; + del_op.params.expiration_time = expiration_time; + del_op.params.zones_trace = zones_trace; + + return del_op.delete_obj(null_yield); +} + +int RGWRados::delete_raw_obj(const rgw_raw_obj& obj) +{ + rgw_rados_ref ref; + int r = get_raw_obj_ref(obj, &ref); + if (r < 0) { + return r; + } + + ObjectWriteOperation op; + + op.remove(); + r = rgw_rados_operate(ref.pool.ioctx(), ref.obj.oid, &op, null_yield); + if (r < 0) + return r; + + return 0; +} + +int RGWRados::delete_obj_index(const rgw_obj& obj, ceph::real_time mtime) +{ + std::string oid, key; + get_obj_bucket_and_oid_loc(obj, oid, key); + + auto obj_ctx = svc.sysobj->init_obj_ctx(); + + RGWBucketInfo bucket_info; + int ret = get_bucket_instance_info(obj_ctx, obj.bucket, bucket_info, NULL, NULL, null_yield); + if (ret < 0) { + ldout(cct, 0) << "ERROR: " << __func__ << "() get_bucket_instance_info(bucket=" << obj.bucket << ") returned ret=" << ret << dendl; + return ret; + } + + RGWRados::Bucket bop(this, bucket_info); + RGWRados::Bucket::UpdateIndex index_op(&bop, obj); + + return index_op.complete_del(-1 /* pool */, 0, mtime, NULL); +} + +static void generate_fake_tag(RGWRados *store, map& attrset, RGWObjManifest& manifest, bufferlist& manifest_bl, bufferlist& tag_bl) +{ + string tag; + + RGWObjManifest::obj_iterator mi = manifest.obj_begin(); + if (mi != manifest.obj_end()) { + if (manifest.has_tail()) // first object usually points at the head, let's skip to a more unique part + ++mi; + tag = mi.get_location().get_raw_obj(store).oid; + tag.append("_"); + } + + unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char md5_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + MD5 hash; + hash.Update((const unsigned char *)manifest_bl.c_str(), manifest_bl.length()); + + map::iterator iter = attrset.find(RGW_ATTR_ETAG); + if (iter != attrset.end()) { + bufferlist& bl = iter->second; + hash.Update((const unsigned char *)bl.c_str(), bl.length()); + } + + hash.Final(md5); + buf_to_hex(md5, CEPH_CRYPTO_MD5_DIGESTSIZE, md5_str); + tag.append(md5_str); + + ldout(store->ctx(), 10) << "generate_fake_tag new tag=" << tag << dendl; + + tag_bl.append(tag.c_str(), tag.size() + 1); +} + +static bool is_olh(map& attrs) +{ map::iterator iter = attrs.find(RGW_ATTR_OLH_INFO); return (iter != attrs.end()); } @@ -5414,6 +6311,12 @@ int RGWRados::Object::Read::get_attr(const char *name, bufferlist& dest, optiona return 0; } +int RGWRados::Object::Read::get_attr(const char *name, bufferlist& dest, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::Read::get_attr", parent_span); + return RGWRados::Object::Read::get_attr(name, dest, y); +} + int RGWRados::Object::Stat::stat_async() { RGWObjectCtx& ctx = source->get_ctx(); @@ -5626,6 +6529,14 @@ int RGWRados::Object::prepare_atomic_modification(ObjectWriteOperation& op, bool return 0; } +int RGWRados::Object::prepare_atomic_modification(ObjectWriteOperation& op, bool reset_obj, const string *ptag, + const char *if_match, const char *if_nomatch, bool removal_op, + bool modify_tail, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::prepare_atomic_modification", parent_span); + return RGWRados::Object::prepare_atomic_modification(op, reset_obj, ptag, if_match, if_nomatch, removal_op, modify_tail, y); +} + /** * Set an attr on an object. * bucket: name of the bucket holding the object @@ -5877,33 +6788,137 @@ int RGWRados::Object::Read::prepare(optional_yield y) return 0; } -int RGWRados::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end) +int RGWRados::Object::Read::prepare(optional_yield y,Jager_Tracer& tracer, const Span& parent_span) { - if (ofs < 0) { - ofs += obj_size; - if (ofs < 0) - ofs = 0; - end = obj_size - 1; - } else if (end < 0) { - end = obj_size - 1; + Span span=tracer.child_span("rgw_rados.cc RGWRados::Object::Read::prepare",parent_span); + RGWRados *store = source->get_store(); + CephContext *cct = store->ctx(); + + bufferlist etag; + + map::iterator iter; + + RGWObjState *astate; + int r = source->get_state(&astate, true, y); + if (r < 0) + return r; + + if (!astate->exists) { + return -ENOENT; } - if (obj_size > 0) { - if (ofs >= (off_t)obj_size) { - return -ERANGE; - } - if (end >= (off_t)obj_size) { - end = obj_size - 1; + const RGWBucketInfo& bucket_info = source->get_bucket_info(); + + state.obj = astate->obj; + store->obj_to_raw(bucket_info.placement_rule, state.obj, &state.head_obj, tracer, span); + + state.cur_pool = state.head_obj.pool; + state.cur_ioctx = &state.io_ctxs[state.cur_pool]; + + r = store->get_obj_head_ioctx(bucket_info, state.obj, state.cur_ioctx, tracer, span); + if (r < 0) { + return r; + } + if (params.target_obj) { + *params.target_obj = state.obj; + } + if (params.attrs) { + *params.attrs = astate->attrset; + if (cct->_conf->subsys.should_gather()) { + for (iter = params.attrs->begin(); iter != params.attrs->end(); ++iter) { + ldout(cct, 20) << "Read xattr: " << iter->first << dendl; + } } } - return 0; -} -int RGWRados::Bucket::UpdateIndex::guard_reshard(BucketShard **pbs, std::function call) -{ - RGWRados *store = target->get_store(); - BucketShard *bs; - int r; + /* Convert all times go GMT to make them compatible */ + if (conds.mod_ptr || conds.unmod_ptr) { + obj_time_weight src_weight; + src_weight.init(astate); + src_weight.high_precision = conds.high_precision_time; + + obj_time_weight dest_weight; + dest_weight.high_precision = conds.high_precision_time; + + if (conds.mod_ptr && !conds.if_nomatch) { + dest_weight.init(*conds.mod_ptr, conds.mod_zone_id, conds.mod_pg_ver); + ldout(cct, 10) << "If-Modified-Since: " << dest_weight << " Last-Modified: " << src_weight << dendl; + if (!(dest_weight < src_weight)) { + return -ERR_NOT_MODIFIED; + } + } + + if (conds.unmod_ptr && !conds.if_match) { + dest_weight.init(*conds.unmod_ptr, conds.mod_zone_id, conds.mod_pg_ver); + ldout(cct, 10) << "If-UnModified-Since: " << dest_weight << " Last-Modified: " << src_weight << dendl; + if (dest_weight < src_weight) { + return -ERR_PRECONDITION_FAILED; + } + } + } + if (conds.if_match || conds.if_nomatch) { + r = get_attr(RGW_ATTR_ETAG, etag, y); + if (r < 0) + return r; + + if (conds.if_match) { + string if_match_str = rgw_string_unquote(conds.if_match); + ldout(cct, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-Match: " << if_match_str << dendl; + if (if_match_str.compare(0, etag.length(), etag.c_str(), etag.length()) != 0) { + return -ERR_PRECONDITION_FAILED; + } + } + + if (conds.if_nomatch) { + string if_nomatch_str = rgw_string_unquote(conds.if_nomatch); + ldout(cct, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-NoMatch: " << if_nomatch_str << dendl; + if (if_nomatch_str.compare(0, etag.length(), etag.c_str(), etag.length()) == 0) { + return -ERR_NOT_MODIFIED; + } + } + } + + if (params.obj_size) + *params.obj_size = astate->size; + if (params.lastmod) + *params.lastmod = astate->mtime; + + return 0; +} + +int RGWRados::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end) +{ + if (ofs < 0) { + ofs += obj_size; + if (ofs < 0) + ofs = 0; + end = obj_size - 1; + } else if (end < 0) { + end = obj_size - 1; + } + + if (obj_size > 0) { + if (ofs >= (off_t)obj_size) { + return -ERANGE; + } + if (end >= (off_t)obj_size) { + end = obj_size - 1; + } + } + return 0; +} + +int RGWRados::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::Read::range_to_ofs", parent_span); + return RGWRados::Object::Read::range_to_ofs(obj_size, ofs, end); +} + +int RGWRados::Bucket::UpdateIndex::guard_reshard(BucketShard **pbs, std::function call) +{ + RGWRados *store = target->get_store(); + BucketShard *bs; + int r; #define NUM_RESHARD_RETRIES 10 for (int i = 0; i < NUM_RESHARD_RETRIES; ++i) { @@ -5974,6 +6989,12 @@ int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op, const string *write_t return 0; } +int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op, const string *write_tag, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Bucket::UpdateIndex::prepare", parent_span); + return RGWRados::Bucket::UpdateIndex::prepare(op, write_tag, y); +} + int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch, uint64_t size, uint64_t accounted_size, ceph::real_time& ut, const string& etag, @@ -6332,6 +7353,13 @@ int RGWRados::Object::Read::iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, return data.drain(); } +int RGWRados::Object::Read::iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::Object::Read::iterate", parent_span); + return RGWRados::Object::Read::iterate(ofs, end, cb, y); +} + int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, off_t ofs, off_t end, uint64_t max_chunk_size, @@ -7653,8 +8681,25 @@ int RGWRados::put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, real_t return 0; } +int RGWRados::put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, real_time mtime, obj_version *pep_objv, + map *pattrs, bool create_entry_point, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::put_linked_bucket_info", parent_span); + return RGWRados::put_linked_bucket_info(info, exclusive, mtime, pep_objv, pattrs, create_entry_point); +} + int RGWRados::update_containers_stats(map& m) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rados.cc RGWRados::update_containers_stats", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rados.cc RGWRados::update_containers_stats"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif auto obj_ctx = svc.sysobj->init_obj_ctx(); map::iterator iter; @@ -7697,6 +8742,12 @@ int RGWRados::update_containers_stats(map& m) return m.size(); } +int RGWRados::update_containers_stats(map& m, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::update_containers_stats", parent_span); + return RGWRados::update_containers_stats(m); +} + int RGWRados::append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl) { rgw_rados_ref ref; @@ -8127,58 +9178,336 @@ int RGWRados::cls_obj_complete_del(BucketShard& bs, string& tag, bilog_flags, zones_trace); } -int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace) -{ - rgw_bucket_dir_entry ent; - obj.key.get_index_key(&ent.key); - return cls_obj_complete_op(bs, obj, CLS_RGW_OP_CANCEL, tag, - -1 /* pool id */, 0, ent, - RGWObjCategory::None, NULL, bilog_flags, - zones_trace); -} +int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace) +{ + rgw_bucket_dir_entry ent; + obj.key.get_index_key(&ent.key); + return cls_obj_complete_op(bs, obj, CLS_RGW_OP_CANCEL, tag, + -1 /* pool id */, 0, ent, + RGWObjCategory::None, NULL, bilog_flags, + zones_trace); +} + +int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout) +{ + RGWSI_RADOS::Pool index_pool; + map bucket_objs; + int r = svc.bi_rados->open_bucket_index(bucket_info, std::nullopt, &index_pool, &bucket_objs, nullptr); + if (r < 0) + return r; + + return CLSRGWIssueSetTagTimeout(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)(); +} + + +uint32_t RGWRados::calc_ordered_bucket_list_per_shard(uint32_t num_entries, + uint32_t num_shards) +{ + // We want to minimize the chances that when num_shards >> + // num_entries that we return much fewer than num_entries to the + // client. Given all the overhead of making a cls call to the osd, + // returning a few entries is not much more work than returning one + // entry. This minimum might be better tuned based on future + // experiments where num_shards >> num_entries. (Note: ">>" should + // be interpreted as "much greater than".) + constexpr uint32_t min_read = 8; + + // The following is based on _"Balls into Bins" -- A Simple and + // Tight Analysis_ by Raab and Steger. We add 1 as a way to handle + // cases when num_shards >> num_entries (it almost serves as a + // ceiling calculation). We also assume alpha is 1.0 and extract it + // from the calculation. Future work could involve memoizing some of + // the transcendental functions to minimize repeatedly re-calling + // them with the same parameters, which we expect to be the case the + // majority of the time. + uint32_t calc_read = + 1 + + static_cast((num_entries / num_shards) + + sqrt((2 * num_entries) * + log(num_shards) / num_shards)); + + return std::max(min_read, calc_read); +} + + +int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, + const int shard_id, + const rgw_obj_index_key& start_after, + const string& prefix, + const string& delimiter, + const uint32_t num_entries, + const bool list_versions, + const uint16_t expansion_factor, + ent_map_t& m, + bool* is_truncated, + bool* cls_filtered, + rgw_obj_index_key *last_entry, + optional_yield y, + check_filter_t force_check_filter) +{ + + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rados.cc RGWRados::cls_bucket_list_ordered", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rados.cc RGWRados::cls_bucket_list_ordered"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + /* expansion_factor allows the number of entries to read to grow + * exponentially; this is used when earlier reads are producing too + * few results, perhaps due to filtering or to a series of + * namespaced entries */ + + ldout(cct, 10) << "RGWRados::" << __func__ << ": " << bucket_info.bucket << + " start_after=\"" << start_after.name << + "[" << start_after.instance << + "]\", prefix=\"" << prefix << + "\" num_entries=" << num_entries << + ", list_versions=" << list_versions << + ", expansion_factor=" << expansion_factor << dendl; + + m.clear(); + + RGWSI_RADOS::Pool index_pool; + // key - oid (for different shards if there is any) + // value - list result for the corresponding oid (shard), it is filled by + // the AIO callback + map shard_oids; + int r = svc.bi_rados->open_bucket_index(bucket_info, shard_id, + &index_pool, &shard_oids, + nullptr); + if (r < 0) { + return r; + } + + const uint32_t shard_count = shard_oids.size(); + uint32_t num_entries_per_shard; + if (expansion_factor == 0) { + num_entries_per_shard = + calc_ordered_bucket_list_per_shard(num_entries, shard_count); + } else if (expansion_factor <= 11) { + // we'll max out the exponential multiplication factor at 1024 (2<<10) + num_entries_per_shard = + std::min(num_entries, + (uint32_t(1 << (expansion_factor - 1)) * + calc_ordered_bucket_list_per_shard(num_entries, shard_count))); + } else { + num_entries_per_shard = num_entries; + } + + ldout(cct, 10) << "RGWRados::" << __func__ << + " request from each of " << shard_count << + " shard(s) for " << num_entries_per_shard << " entries to get " << + num_entries << " total entries" << dendl; + + auto& ioctx = index_pool.ioctx(); + map shard_list_results; + cls_rgw_obj_key start_after_key(start_after.name, start_after.instance); + r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter, + num_entries_per_shard, + list_versions, shard_oids, shard_list_results, + cct->_conf->rgw_bucket_index_max_aio)(); + if (r < 0) { + return r; + } + + // to manage the iterators through each shard's list results + struct ShardTracker { + const size_t shard_idx; + rgw_cls_list_ret& result; + const std::string& oid_name; + RGWRados::ent_map_t::iterator cursor; + RGWRados::ent_map_t::iterator end; + + // manages an iterator through a shard and provides other + // accessors + ShardTracker(size_t _shard_idx, + rgw_cls_list_ret& _result, + const std::string& _oid_name): + shard_idx(_shard_idx), + result(_result), + oid_name(_oid_name), + cursor(_result.dir.m.begin()), + end(_result.dir.m.end()) + {} + + inline const std::string& entry_name() const { + return cursor->first; + } + rgw_bucket_dir_entry& dir_entry() const { + return cursor->second; + } + inline bool is_truncated() const { + return result.is_truncated; + } + inline ShardTracker& advance() { + ++cursor; + // return a self-reference to allow for chaining of calls, such + // as x.advance().at_end() + return *this; + } + inline bool at_end() const { + return cursor == end; + } + }; // ShardTracker + + // add the next unique candidate, or return false if we reach the end + auto next_candidate = [] (ShardTracker& t, + std::map& candidates, + size_t tracker_idx) { + while (!t.at_end()) { + if (candidates.emplace(t.entry_name(), tracker_idx).second) { + return; + } + t.advance(); // skip duplicate common prefixes + } + }; + + // one tracker per shard requested (may not be all shards) + std::vector results_trackers; + results_trackers.reserve(shard_list_results.size()); + for (auto& r : shard_list_results) { + results_trackers.emplace_back(r.first, r.second, shard_oids[r.first]); + + // if any *one* shard's result is trucated, the entire result is + // truncated + *is_truncated = *is_truncated || r.second.is_truncated; + + // unless *all* are shards are cls_filtered, the entire result is + // not filtered + *cls_filtered = *cls_filtered && r.second.cls_filtered; + } + + // create a map to track the next candidate entry from ShardTracker + // (key=candidate, value=index into results_trackers); as we consume + // entries from shards, we replace them with the next entries in the + // shards until we run out + map candidates; + size_t tracker_idx = 0; + for (auto& t : results_trackers) { + // it's important that the values in the map refer to the index + // into the results_trackers vector, which may not be the same + // as the shard number (i.e., when not all shards are requested) + next_candidate(t, candidates, tracker_idx); + ++tracker_idx; + } + + rgw_bucket_dir_entry* + last_entry_visited = nullptr; // to set last_entry (marker) + map updates; + uint32_t count = 0; + while (count < num_entries && !candidates.empty()) { + r = 0; + // select the next entry in lexical order (first key in map); + // again tracker_idx is not necessarily shard number, but is index + // into results_trackers vector + tracker_idx = candidates.begin()->second; + auto& tracker = results_trackers.at(tracker_idx); + last_entry_visited = &tracker.dir_entry(); + const string& name = tracker.entry_name(); + rgw_bucket_dir_entry& dirent = tracker.dir_entry(); + + ldout(cct, 20) << "RGWRados::" << __func__ << " currently processing " << + dirent.key << " from shard " << tracker.shard_idx << dendl; + + const bool force_check = + force_check_filter && force_check_filter(dirent.key.name); + + if ((!dirent.exists && + !dirent.is_delete_marker() && + !dirent.is_common_prefix()) || + !dirent.pending_map.empty() || + force_check) { + /* there are uncommitted ops. We need to check the current + * state, and if the tags are old we need to do clean-up as + * well. */ + librados::IoCtx sub_ctx; + sub_ctx.dup(ioctx); + r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, + updates[tracker.oid_name], y); + if (r < 0 && r != -ENOENT) { + return r; + } + } else { + r = 0; + } + + if (r >= 0) { + ldout(cct, 10) << "RGWRados::" << __func__ << ": got " << + dirent.key.name << "[" << dirent.key.instance << "]" << dendl; + m[name] = std::move(dirent); + ++count; + } else { + ldout(cct, 10) << "RGWRados::" << __func__ << ": skipping " << + dirent.key.name << "[" << dirent.key.instance << "]" << dendl; + } + + // refresh the candidates map + candidates.erase(candidates.begin()); + tracker.advance(); -int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout) -{ - RGWSI_RADOS::Pool index_pool; - map bucket_objs; - int r = svc.bi_rados->open_bucket_index(bucket_info, std::nullopt, &index_pool, &bucket_objs, nullptr); - if (r < 0) - return r; + next_candidate(tracker, candidates, tracker_idx); - return CLSRGWIssueSetTagTimeout(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)(); -} + if (tracker.at_end() && tracker.is_truncated()) { + // once we exhaust one shard that is truncated, we need to stop, + // as we cannot be certain that one of the next entries needs to + // come from that shard; S3 and swift protocols allow returning + // fewer than what was requested + break; + } + } // while we haven't provided requested # of result entries + // suggest updates if there are any + for (auto& miter : updates) { + if (miter.second.length()) { + ObjectWriteOperation o; + cls_rgw_suggest_changes(o, miter.second); + // we don't care if we lose suggested updates, send them off blindly + AioCompletion *c = + librados::Rados::aio_create_completion(nullptr, nullptr); + ioctx.aio_operate(miter.first, c, &o); + c->release(); + } + } // updates loop -uint32_t RGWRados::calc_ordered_bucket_list_per_shard(uint32_t num_entries, - uint32_t num_shards) -{ - // We want to minimize the chances that when num_shards >> - // num_entries that we return much fewer than num_entries to the - // client. Given all the overhead of making a cls call to the osd, - // returning a few entries is not much more work than returning one - // entry. This minimum might be better tuned based on future - // experiments where num_shards >> num_entries. (Note: ">>" should - // be interpreted as "much greater than".) - constexpr uint32_t min_read = 8; + // determine truncation by checking if all the returned entries are + // consumed or not + *is_truncated = false; + for (const auto& t : results_trackers) { + if (!t.at_end() || t.is_truncated()) { + *is_truncated = true; + break; + } + } - // The following is based on _"Balls into Bins" -- A Simple and - // Tight Analysis_ by Raab and Steger. We add 1 as a way to handle - // cases when num_shards >> num_entries (it almost serves as a - // ceiling calculation). We also assume alpha is 1.0 and extract it - // from the calculation. Future work could involve memoizing some of - // the transcendental functions to minimize repeatedly re-calling - // them with the same parameters, which we expect to be the case the - // majority of the time. - uint32_t calc_read = - 1 + - static_cast((num_entries / num_shards) + - sqrt((2 * num_entries) * - log(num_shards) / num_shards)); + ldout(cct, 20) << "RGWRados::" << __func__ << + ": returning, count=" << count << ", is_truncated=" << *is_truncated << + dendl; - return std::max(min_read, calc_read); + if (*is_truncated && count < num_entries) { + ldout(cct, 10) << "RGWRados::" << __func__ << + ": INFO requested " << num_entries << " entries but returning " << + count << ", which is truncated" << dendl; + } + + if (last_entry_visited != nullptr && last_entry) { + // since we'll not need this any more, might as well move it... + *last_entry = std::move(last_entry_visited->key); + ldout(cct, 20) << "RGWRados::" << __func__ << + ": returning, last_entry=" << *last_entry << dendl; + } else { + ldout(cct, 20) << "RGWRados::" << __func__ << + ": returning, last_entry NOT SET" << dendl; + } + + return 0; } + int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, const int shard_id, const rgw_obj_index_key& start_after, @@ -8192,6 +9521,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, bool* cls_filtered, rgw_obj_index_key *last_entry, optional_yield y, + Jager_Tracer& tracer, const Span& parent_span, check_filter_t force_check_filter) { /* expansion_factor allows the number of entries to read to grow @@ -8208,7 +9538,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, ", expansion_factor=" << expansion_factor << dendl; m.clear(); - + Span span = tracer.child_span("rgw_rados.cc RGWRados::cls_bucket_list_ordered", parent_span); RGWSI_RADOS::Pool index_pool; // key - oid (for different shards if there is any) // value - list result for the corresponding oid (shard), it is filled by @@ -8445,6 +9775,8 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, } + + int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id, const rgw_obj_index_key& start_after, @@ -8456,6 +9788,16 @@ int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info, rgw_obj_index_key *last_entry, optional_yield y, check_filter_t force_check_filter) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rados.cc RGWRados::cls_bucket_list_unordered", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rados.cc RGWRados::cls_bucket_list_unordered"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket << " start_after " << start_after.name << "[" << start_after.instance << "] num_entries " << num_entries << dendl; @@ -8602,6 +9944,164 @@ int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info, return 0; } // RGWRados::cls_bucket_list_unordered +int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info, + int shard_id, + const rgw_obj_index_key& start_after, + const string& prefix, + uint32_t num_entries, + bool list_versions, + std::vector& ent_list, + bool *is_truncated, + rgw_obj_index_key *last_entry, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span, + check_filter_t force_check_filter ) +{ + ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket << + " start_after " << start_after.name << "[" << start_after.instance << + "] num_entries " << num_entries << dendl; + Span span = tracer.child_span("rgw_rados.cc RGWRados::cls_bucket_list_unordered", parent_span); + ent_list.clear(); + static MultipartMetaFilter multipart_meta_filter; + + *is_truncated = false; + RGWSI_RADOS::Pool index_pool; + + map oids; + int r = svc.bi_rados->open_bucket_index(bucket_info, shard_id, &index_pool, &oids, nullptr, tracer, span); + if (r < 0) + return r; + + auto& ioctx = index_pool.ioctx(); + + const uint32_t num_shards = oids.size(); + + rgw_obj_index_key marker = start_after; + uint32_t current_shard; + if (shard_id >= 0) { + current_shard = shard_id; + } else if (start_after.empty()) { + current_shard = 0u; + } else { + // at this point we have a marker (start_after) that has something + // in it, so we need to get to the bucket shard index, so we can + // start reading from there + + std::string key; + // test whether object name is a multipart meta name + if(! multipart_meta_filter.filter(start_after.name, key)) { + // if multipart_meta_filter fails, must be "regular" (i.e., + // unadorned) and the name is the key + key = start_after.name; + } + + // now convert the key (oid) to an rgw_obj_key since that will + // separate out the namespace, name, and instance + rgw_obj_key obj_key; + bool parsed = rgw_obj_key::parse_raw_oid(key, &obj_key); + if (!parsed) { + ldout(cct, 0) << + "ERROR: RGWRados::cls_bucket_list_unordered received an invalid " + "start marker: '" << start_after << "'" << dendl; + return -EINVAL; + } else if (obj_key.name.empty()) { + // if the name is empty that means the object name came in with + // a namespace only, and therefore we need to start our scan at + // the first bucket index shard + current_shard = 0u; + } else { + // so now we have the key used to compute the bucket index shard + // and can extract the specific shard from it + current_shard = svc.bi_rados->bucket_shard_index(obj_key.name, num_shards); + } + } + + uint32_t count = 0u; + map updates; + rgw_obj_index_key last_added_entry; + while (count <= num_entries && + ((shard_id >= 0 && current_shard == uint32_t(shard_id)) || + current_shard < num_shards)) { + const std::string& oid = oids[current_shard]; + rgw_cls_list_ret result; + + librados::ObjectReadOperation op; + string empty_delimiter; + cls_rgw_bucket_list_op(op, marker, prefix, empty_delimiter, + num_entries, + list_versions, &result); + r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield, tracer, span); + if (r < 0) + return r; + + for (auto& entry : result.dir.m) { + rgw_bucket_dir_entry& dirent = entry.second; + + bool force_check = force_check_filter && + force_check_filter(dirent.key.name); + if ((!dirent.exists && !dirent.is_delete_marker()) || + !dirent.pending_map.empty() || + force_check) { + /* there are uncommitted ops. We need to check the current state, + * and if the tags are old we need to do cleanup as well. */ + librados::IoCtx sub_ctx; + sub_ctx.dup(ioctx); + r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[oid], y); + if (r < 0 && r != -ENOENT) { + return r; + } + } else { + r = 0; + } + + // at this point either r >=0 or r == -ENOENT + if (r >= 0) { // i.e., if r != -ENOENT + ldout(cct, 10) << "RGWRados::cls_bucket_list_unordered: got " << + dirent.key.name << "[" << dirent.key.instance << "]" << dendl; + + if (count < num_entries) { + marker = last_added_entry = dirent.key; // double assign + ent_list.emplace_back(std::move(dirent)); + ++count; + } else { + *is_truncated = true; + goto check_updates; + } + } else { // r == -ENOENT + // in the case of -ENOENT, make sure we're advancing marker + // for possible next call to CLSRGWIssueBucketList + marker = dirent.key; + } + } // entry for loop + + if (!result.is_truncated) { + // if we reached the end of the shard read next shard + ++current_shard; + marker = rgw_obj_index_key(); + } + } // shard loop + +check_updates: + + // suggest updates if there is any + map::iterator miter = updates.begin(); + for (; miter != updates.end(); ++miter) { + if (miter->second.length()) { + ObjectWriteOperation o; + cls_rgw_suggest_changes(o, miter->second); + // we don't care if we lose suggested updates, send them off blindly + AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr); + ioctx.aio_operate(miter->first, c, &o); + c->release(); + } + } + + if (last_entry && !ent_list.empty()) { + *last_entry = last_added_entry; + } + + return 0; +} + int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info) @@ -8953,6 +10453,13 @@ int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket, return quota_handler->check_quota(bucket_owner, bucket, user_quota, bucket_quota, 1, obj_size); } +int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket, + RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, Jager_Tracer& tracer, const Span& parent_span, bool check_size_only) +{ + Span span = tracer.child_span("rgw_rados.cc RGWRados::check_quota", parent_span); + return RGWRados::check_quota(bucket_owner, bucket, user_quota, bucket_quota, obj_size, check_size_only); +} + int RGWRados::get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id) { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index e19b8d44811e6..4b75cfe039cea 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -9,6 +9,7 @@ #include "include/rados/librados.hpp" #include "include/Context.h" +#include "include/tracer.h" #include "common/RefCountedObj.h" #include "common/RWLock.h" #include "common/ceph_time.h" @@ -461,7 +462,9 @@ class RGWRados uint32_t bucket_index_max_shards; int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx); + int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx, Jager_Tracer&, const Span&); int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref); + int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref, Jager_Tracer&, const Span&); int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref); uint64_t max_bucket_id; @@ -596,7 +599,9 @@ class RGWRados int get_required_alignment(const rgw_pool& pool, uint64_t *alignment); void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size); + void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size, Jager_Tracer&, const Span&); int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr); + int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, Jager_Tracer&, const Span&, uint64_t *palignment = nullptr); int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr); uint32_t get_max_bucket_shards() { @@ -654,9 +659,12 @@ class RGWRados int create_pool(const rgw_pool& pool); void create_bucket_id(string *bucket_id); + void create_bucket_id(string *bucket_id, Jager_Tracer&, const Span&); bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool); + bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool, Jager_Tracer&, const Span&); bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj); + bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj, Jager_Tracer&, const Span&); int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket, const string& zonegroup_id, @@ -671,6 +679,20 @@ class RGWRados rgw_bucket *master_bucket, uint32_t *master_num_shards, bool exclusive = true); + int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket, + const string& zonegroup_id, + const rgw_placement_rule& placement_rule, + const string& swift_ver_location, + const RGWQuotaInfo * pquota_info, + map& attrs, + RGWBucketInfo& bucket_info, + obj_version *pobjv, + obj_version *pep_objv, + ceph::real_time creation_time, + rgw_bucket *master_bucket, + uint32_t *master_num_shards, + Jager_Tracer&, const Span&, + bool exclusive = true); RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; } @@ -707,7 +729,10 @@ class RGWRados int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag, const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail, optional_yield y); + int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag, + const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail, optional_yield y, Jager_Tracer&, const Span&); int complete_atomic_modification(); + int complete_atomic_modification(Jager_Tracer&, const Span&); public: Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info), @@ -780,10 +805,14 @@ class RGWRados explicit Read(RGWRados::Object *_source) : source(_source) {} int prepare(optional_yield y); + int prepare(optional_yield y,Jager_Tracer&, const Span&); static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end); + static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end, Jager_Tracer&, const Span&); int read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y); int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y); + int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y, Jager_Tracer&, const Span&); int get_attr(const char *name, bufferlist& dest, optional_yield y); + int get_attr(const char *name, bufferlist& dest, optional_yield y, Jager_Tracer&, const Span&); }; struct Write { @@ -825,6 +854,8 @@ class RGWRados void *index_op, optional_yield y); int write_meta(uint64_t size, uint64_t accounted_size, map& attrs, optional_yield y); + int write_meta(uint64_t size, uint64_t accounted_size, + map& attrs, optional_yield y, Jager_Tracer&, const Span&); int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive); const req_state* get_req_state() { return (req_state *)target->get_ctx().get_private(); @@ -863,6 +894,7 @@ class RGWRados explicit Delete(RGWRados::Object *_target) : target(_target) {} int delete_obj(optional_yield y); + int delete_obj(optional_yield y, Jager_Tracer&, const Span&); }; struct Stat { @@ -968,6 +1000,7 @@ class RGWRados } int prepare(RGWModifyOp, const string *write_tag, optional_yield y); + int prepare(RGWModifyOp, const string *write_tag, optional_yield y, Jager_Tracer&, const Span&); int complete(int64_t poolid, uint64_t epoch, uint64_t size, uint64_t accounted_size, ceph::real_time& ut, const string& etag, const string& content_type, @@ -998,11 +1031,21 @@ class RGWRados map *common_prefixes, bool *is_truncated, optional_yield y); + int list_objects_ordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated, Jager_Tracer&, const Span&, + optional_yield y); int list_objects_unordered(int64_t max, vector *result, map *common_prefixes, bool *is_truncated, optional_yield y); + int list_objects_unordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated, Jager_Tracer&, const Span&, + optional_yield y); public: @@ -1040,6 +1083,20 @@ class RGWRados is_truncated, y); } } + + int list_objects(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated,Jager_Tracer& tracer, const Span& parent_span, + optional_yield y) { + if (params.allow_unordered) { + return list_objects_unordered(max, result, common_prefixes, + is_truncated, tracer, parent_span, y); + } else { + return list_objects_ordered(max, result, common_prefixes, + is_truncated, tracer, parent_span, y); + } + } rgw_obj_key& get_next_marker() { return next_marker; } @@ -1061,13 +1118,26 @@ class RGWRados RGWBucketInfo& bucket_info, /* in */ rgw_obj& obj, /* in */ const DoutPrefixProvider *dpp, /* in/out */ - optional_yield y); /* in */ + optional_yield y); + int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */ + const rgw_user& user, /* in */ + RGWBucketInfo& bucket_info, /* in */ + rgw_obj& obj, /* in */ + const DoutPrefixProvider *dpp, /* in/out */ + optional_yield y, + Jager_Tracer&, const Span&); /* in */ int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */ const rgw_user& user, /* in */ RGWBucketInfo& bucket_info, /* in */ rgw_obj& obj, /* in */ bool& restored, /* out */ - const DoutPrefixProvider *dpp); /* in/out */ + const DoutPrefixProvider *dpp);/* in/out */ + int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */ + const rgw_user& user, /* in */ + RGWBucketInfo& bucket_info, /* in */ + rgw_obj& obj, /* in */ + bool& restored, /* out */ + const DoutPrefixProvider *dpp, Jager_Tracer&, const Span&); int copy_obj_to_remote_dest(RGWObjState *astate, map& src_attrs, RGWRados::Object::Read& read_op, @@ -1200,6 +1270,7 @@ class RGWRados optional_yield y); int check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y); + int check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y, Jager_Tracer&, const Span&); /** * Delete a bucket. @@ -1207,6 +1278,7 @@ class RGWRados * Returns 0 on success, -ERR# otherwise. */ int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, bool check_empty = true); + int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, Jager_Tracer&, const Span&, bool check_empty = true); void wakeup_meta_sync_shards(set& shard_ids); void wakeup_data_sync_shards(const rgw_zone_id& source_zone, map >& shard_ids); @@ -1323,6 +1395,7 @@ class RGWRados void gen_rand_obj_instance_name(rgw_obj *target); int update_containers_stats(map& m); + int update_containers_stats(map& m, Jager_Tracer&, const Span&); int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl); public: @@ -1361,6 +1434,8 @@ class RGWRados int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv, map *pattrs, bool create_entry_point); + int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv, + map *pattrs, bool create_entry_point, Jager_Tracer&, const Span&); int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr); int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch, @@ -1391,6 +1466,23 @@ class RGWRados rgw_obj_index_key *last_entry, optional_yield y, check_filter_t force_check_filter = nullptr); + + int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, + const int shard_id, + const rgw_obj_index_key& start_after, + const string& prefix, + const string& delimiter, + const uint32_t num_entries, + const bool list_versions, + const uint16_t exp_factor, // 0 means ignore + ent_map_t& m, + bool* is_truncated, + bool* cls_filtered, + rgw_obj_index_key *last_entry, + optional_yield y, + Jager_Tracer&, const Span&, + check_filter_t force_check_filter = nullptr); + int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id, const rgw_obj_index_key& start_after, @@ -1402,6 +1494,17 @@ class RGWRados rgw_obj_index_key *last_entry, optional_yield y, check_filter_t = nullptr); + int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, + int shard_id, + const rgw_obj_index_key& start_after, + const string& prefix, + uint32_t num_entries, + bool list_versions, + vector& ent_list, + bool *is_truncated, + rgw_obj_index_key *last_entry, + optional_yield y, Jager_Tracer&, const Span&, + check_filter_t = nullptr); int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector& headers, map *bucket_instance_ids = NULL); int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio); @@ -1461,6 +1564,8 @@ class RGWRados int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket, RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false); + int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket, + RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, Jager_Tracer&, const Span&, bool check_size_only = false); int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket, uint64_t num_objs); diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc index 2816945d91d32..d51ae734c66a5 100644 --- a/src/rgw/rgw_rest.cc +++ b/src/rgw/rgw_rest.cc @@ -213,7 +213,7 @@ void rgw_rest_init(CephContext *cct, const RGWZoneGroup& zone_group) /* TODO: We should have a sanity check that no hostname matches the end of * any other hostname, otherwise we will get ambigious results from * rgw_find_host_in_domains. - * Eg: + * Eg: * Hostnames: [A, B.A] * Inputs: [Z.A, X.B.A] * Z.A clearly splits to subdomain=Z, domain=Z @@ -408,6 +408,16 @@ void dump_etag(struct req_state* const s, void dump_bucket_from_state(struct req_state *s) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest.cc dump_bucket_from_state", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rest.cc dump_bucket_from_state"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (g_conf()->rgw_expose_bucket && ! s->bucket_name.empty()) { if (! s->bucket_tenant.empty()) { dump_header(s, "Bucket", @@ -418,6 +428,12 @@ void dump_bucket_from_state(struct req_state *s) } } +void dump_bucket_from_state(struct req_state *s, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest.cc dump_bucket_from_state", parent_span); + dump_bucket_from_state(s); +} + void dump_redirect(struct req_state * const s, const std::string& redirect) { return dump_header_if_nonempty(s, "Location", redirect); @@ -633,7 +649,7 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type, static void build_redirect_url(req_state *s, const string& redirect_base, string *redirect_url) { string& dest_uri = *redirect_url; - + dest_uri = redirect_base; /* * reqest_uri is always start with slash, so we need to remove @@ -805,6 +821,12 @@ int RGWGetObj_ObjStore::get_params() return 0; } +int RGWGetObj_ObjStore::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest.cc RGWGetObj_ObjStore::get_params", parent_span); + return RGWGetObj_ObjStore::get_params(); +} + int RESTArgs::get_string(struct req_state *s, const string& name, const string& def_val, string *val, bool *existed) { @@ -1030,6 +1052,12 @@ int RGWPutObj_ObjStore::get_params() return 0; } +int RGWPutObj_ObjStore::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest.cc RGWPutObj_ObjStore::get_params", parent_span); + return RGWPutObj_ObjStore::get_params(); +} + int RGWPutObj_ObjStore::get_data(bufferlist& bl) { size_t cl; @@ -1065,6 +1093,12 @@ int RGWPutObj_ObjStore::get_data(bufferlist& bl) return len; } +int RGWPutObj_ObjStore::get_data(bufferlist& bl,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_rest.cc RGWPutObj_ObjStore::get_data",parent_span); + return RGWPutObj_ObjStore::get_data(bl); +} + /* * parses params in the format: 'first; param1=foo; param2=bar' @@ -1519,7 +1553,7 @@ std::tuple rgw_rest_read_all_input(struct req_state *s, } bufferptr bp(cl + 1); - + len = recv_body(s, bp.c_str(), cl); if (len < 0) { return std::make_tuple(len, std::move(bl)); @@ -1578,7 +1612,7 @@ int RGWListMultipart_ObjStore::get_params() return op_ret; } } - + string str = s->info.args.get("max-parts"); op_ret = parse_value_and_bound(str, max_parts, 0, g_conf().get_val("rgw_max_listing_results"), @@ -1693,7 +1727,7 @@ int RGWHandler_REST::allocate_formatter(struct req_state *s, int default_type, bool configurable) { - s->format = -1; // set to invalid value to allocation happens anyway + s->format = -1; // set to invalid value to allocation happens anyway auto type = default_type; if (configurable) { string format_str = s->info.args.get("format"); @@ -1842,8 +1876,48 @@ static http_op op_from_method(const char *method) return OP_UNKNOWN; } +int RGWHandler_REST::init_permissions(RGWOp* op,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_rest.cc RGWHandler_REST::init_permissions()",parent_span); + if (op->get_type() == RGW_OP_CREATE_BUCKET) { + // We don't need user policies in case of STS token returned by AssumeRole, hence the check for user type + if (! s->user->get_id().empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) { + try { + map uattrs; + if (auto ret = store->ctl()->user->get_attrs_by_uid(s->user->get_id(), &uattrs, null_yield); ! ret) { + if (s->iam_user_policies.empty()) { + s->iam_user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->get_tenant()); + } else { + // This scenario can happen when a STS token has a policy, then we need to append other user policies + // to the existing ones. (e.g. token returned by GetSessionToken) + auto user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->get_tenant()); + s->iam_user_policies.insert(s->iam_user_policies.end(), user_policies.begin(), user_policies.end()); + } + } + } catch (const std::exception& e) { + lderr(s->cct) << "Error reading IAM User Policy: " << e.what() << dendl; + } + } + rgw_build_iam_environment(store, s); + return 0; + } + + return do_init_permissions(tracer, span); +} + int RGWHandler_REST::init_permissions(RGWOp* op) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest.cc RGWHandler_REST::init_permissions", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest.cc RGWHandler_REST::init_permissions"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + if (op->get_type() == RGW_OP_CREATE_BUCKET) { // We don't need user policies in case of STS token returned by AssumeRole, hence the check for user type if (! s->user->get_id().empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) { @@ -1870,8 +1944,63 @@ int RGWHandler_REST::init_permissions(RGWOp* op) return do_init_permissions(); } +int RGWHandler_REST::read_permissions(RGWOp* op_obj,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_rest.cc RGWHandler_REST::read_permissions()",parent_span); + bool only_bucket = false; + + switch (s->op) { + case OP_HEAD: + case OP_GET: + only_bucket = false; + break; + case OP_PUT: + case OP_POST: + case OP_COPY: + /* is it a 'multi-object delete' request? */ + if (s->info.args.exists("delete")) { + only_bucket = true; + break; + } + if (is_obj_update_op()) { + only_bucket = false; + break; + } + /* is it a 'create bucket' request? */ + if (op_obj->get_type() == RGW_OP_CREATE_BUCKET) + return 0; + only_bucket = true; + break; + case OP_DELETE: + if (!s->info.args.exists("tagging")){ + only_bucket = true; + } + break; + case OP_OPTIONS: + only_bucket = true; + break; + default: + return -EINVAL; + } + + return do_read_permissions(op_obj, only_bucket, tracer, span); +} + + + + int RGWHandler_REST::read_permissions(RGWOp* op_obj) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest.cc RGWHandler_REST::read_permissions", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest.cc RGWHandler_REST::read_permissions"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif bool only_bucket = false; switch (s->op) { @@ -2073,10 +2202,275 @@ int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio) } ldout(s->cct, 20) - << "subdomain=" << subdomain - << " domain=" << domain - << " in_hosted_domain=" << in_hosted_domain - << " in_hosted_domain_s3website=" << in_hosted_domain_s3website + << "subdomain=" << subdomain + << " domain=" << domain + << " in_hosted_domain=" << in_hosted_domain + << " in_hosted_domain_s3website=" << in_hosted_domain_s3website + << dendl; + + if (g_conf()->rgw_resolve_cname + && !in_hosted_domain + && !in_hosted_domain_s3website) { + string cname; + bool found; + int r = rgw_resolver->resolve_cname(info.host, cname, &found); + if (r < 0) { + ldout(s->cct, 0) + << "WARNING: rgw_resolver->resolve_cname() returned r=" << r + << dendl; + } + + if (found) { + ldout(s->cct, 5) << "resolved host cname " << info.host << " -> " + << cname << dendl; + in_hosted_domain = + rgw_find_host_in_domains(cname, &domain, &subdomain, hostnames_set); + + if (s3website_enabled + && !in_hosted_domain_s3website) { + in_hosted_domain_s3website = + rgw_find_host_in_domains(cname, &s3website_domain, + &s3website_subdomain, + hostnames_s3website_set); + if (in_hosted_domain_s3website) { + in_hosted_domain = true; // TODO: should hostnames be a + // strict superset of hostnames_s3website? + domain = s3website_domain; + subdomain = s3website_subdomain; + } + } + + ldout(s->cct, 20) + << "subdomain=" << subdomain + << " domain=" << domain + << " in_hosted_domain=" << in_hosted_domain + << " in_hosted_domain_s3website=" << in_hosted_domain_s3website + << dendl; + } + } + + // Handle A/CNAME records that point to the RGW storage, but do match the + // CNAME test above, per issue http://tracker.ceph.com/issues/15975 + // If BOTH domain & subdomain variables are empty, then none of the above + // cases matched anything, and we should fall back to using the Host header + // directly as the bucket name. + // As additional checks: + // - if the Host header is an IP, we're using path-style access without DNS + // - Also check that the Host header is a valid bucket name before using it. + // - Don't enable virtual hosting if no hostnames are configured + if (subdomain.empty() + && (domain.empty() || domain != info.host) + && !looks_like_ip_address(info.host.c_str()) + && RGWHandler_REST::validate_bucket_name(info.host) == 0 + && !(hostnames_set.empty() && hostnames_s3website_set.empty())) { + subdomain.append(info.host); + in_hosted_domain = 1; + } + + if (s3website_enabled && api_priority_s3website > api_priority_s3) { + in_hosted_domain_s3website = 1; + } + + if (in_hosted_domain_s3website) { + s->prot_flags |= RGW_REST_WEBSITE; + } + + + if (in_hosted_domain && !subdomain.empty()) { + string encoded_bucket = "/"; + encoded_bucket.append(subdomain); + if (s->info.request_uri[0] != '/') + encoded_bucket.append("/"); + encoded_bucket.append(s->info.request_uri); + s->info.request_uri = encoded_bucket; + } + + if (!domain.empty()) { + s->info.domain = domain; + } + + ldout(s->cct, 20) + << "final domain/bucket" + << " subdomain=" << subdomain + << " domain=" << domain + << " in_hosted_domain=" << in_hosted_domain + << " in_hosted_domain_s3website=" << in_hosted_domain_s3website + << " s->info.domain=" << s->info.domain + << " s->info.request_uri=" << s->info.request_uri + << dendl; + } + + if (s->info.domain.empty()) { + s->info.domain = s->cct->_conf->rgw_dns_name; + } + + s->decoded_uri = url_decode(s->info.request_uri); + /* Validate for being free of the '\0' buried in the middle of the string. */ + if (std::strlen(s->decoded_uri.c_str()) != s->decoded_uri.length()) { + return -ERR_ZERO_IN_URL; + } + + /* FastCGI specification, section 6.3 + * http://www.fastcgi.com/devkit/doc/fcgi-spec.html#S6.3 + * === + * The Authorizer application receives HTTP request information from the Web + * server on the FCGI_PARAMS stream, in the same format as a Responder. The + * Web server does not send CONTENT_LENGTH, PATH_INFO, PATH_TRANSLATED, and + * SCRIPT_NAME headers. + * === + * Ergo if we are in Authorizer role, we MUST look at HTTP_CONTENT_LENGTH + * instead of CONTENT_LENGTH for the Content-Length. + * + * There is one slight wrinkle in this, and that's older versions of + * nginx/lighttpd/apache setting BOTH headers. As a result, we have to check + * both headers and can't always simply pick A or B. + */ + const char* content_length = info.env->get("CONTENT_LENGTH"); + const char* http_content_length = info.env->get("HTTP_CONTENT_LENGTH"); + if (!http_content_length != !content_length) { + /* Easy case: one or the other is missing */ + s->length = (content_length ? content_length : http_content_length); + } else if (s->cct->_conf->rgw_content_length_compat && + content_length && http_content_length) { + /* Hard case: Both are set, we have to disambiguate */ + int64_t content_length_i, http_content_length_i; + + content_length_i = parse_content_length(content_length); + http_content_length_i = parse_content_length(http_content_length); + + // Now check them: + if (http_content_length_i < 0) { + // HTTP_CONTENT_LENGTH is invalid, ignore it + } else if (content_length_i < 0) { + // CONTENT_LENGTH is invalid, and HTTP_CONTENT_LENGTH is valid + // Swap entries + content_length = http_content_length; + } else { + // both CONTENT_LENGTH and HTTP_CONTENT_LENGTH are valid + // Let's pick the larger size + if (content_length_i < http_content_length_i) { + // prefer the larger value + content_length = http_content_length; + } + } + s->length = content_length; + // End of: else if (s->cct->_conf->rgw_content_length_compat && + // content_length && + // http_content_length) + } else { + /* no content length was defined */ + s->length = NULL; + } + + if (s->length) { + if (*s->length == '\0') { + s->content_length = 0; + } else { + string err; + s->content_length = strict_strtoll(s->length, 10, &err); + if (!err.empty()) { + ldout(s->cct, 10) << "bad content length, aborting" << dendl; + return -EINVAL; + } + } + } + + if (s->content_length < 0) { + ldout(s->cct, 10) << "negative content length, aborting" << dendl; + return -EINVAL; + } + + map::iterator giter; + for (giter = generic_attrs_map.begin(); giter != generic_attrs_map.end(); + ++giter) { + const char *env = info.env->get(giter->first.c_str()); + if (env) { + s->generic_attrs[giter->second] = env; + } + } + + if (g_conf()->rgw_print_continue) { + const char *expect = info.env->get("HTTP_EXPECT"); + s->expect_cont = (expect && !strcasecmp(expect, "100-continue")); + } + s->op = op_from_method(info.method); + + info.init_meta_info(&s->has_bad_meta); + + return 0; +} + + + + +int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_rest.cc RGWREST::preprocess()",parent_span); + req_info& info = s->info; + + /* save the request uri used to hash on the client side. request_uri may suffer + modifications as part of the bucket encoding in the subdomain calling format. + request_uri_aws4 will be used under aws4 auth */ + s->info.request_uri_aws4 = s->info.request_uri; + + s->cio = cio; + + // We need to know if this RGW instance is running the s3website API with a + // higher priority than regular S3 API, or possibly in place of the regular + // S3 API. + // Map the listing of rgw_enable_apis in REVERSE order, so that items near + // the front of the list have a higher number assigned (and -1 for items not in the list). + list apis; + get_str_list(g_conf()->rgw_enable_apis, apis); + int api_priority_s3 = -1; + int api_priority_s3website = -1; + auto api_s3website_priority_rawpos = std::find(apis.begin(), apis.end(), "s3website"); + auto api_s3_priority_rawpos = std::find(apis.begin(), apis.end(), "s3"); + if (api_s3_priority_rawpos != apis.end()) { + api_priority_s3 = apis.size() - std::distance(apis.begin(), api_s3_priority_rawpos); + } + if (api_s3website_priority_rawpos != apis.end()) { + api_priority_s3website = apis.size() - std::distance(apis.begin(), api_s3website_priority_rawpos); + } + ldout(s->cct, 10) << "rgw api priority: s3=" << api_priority_s3 << " s3website=" << api_priority_s3website << dendl; + bool s3website_enabled = api_priority_s3website >= 0; + + if (info.host.size()) { + ssize_t pos; + if (info.host.find('[') == 0) { + pos = info.host.find(']'); + if (pos >=1) { + info.host = info.host.substr(1, pos-1); + } + } else { + pos = info.host.find(':'); + if (pos >= 0) { + info.host = info.host.substr(0, pos); + } + } + ldout(s->cct, 10) << "host=" << info.host << dendl; + string domain; + string subdomain; + bool in_hosted_domain_s3website = false; + bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain, &subdomain, hostnames_set); + + string s3website_domain; + string s3website_subdomain; + + if (s3website_enabled) { + in_hosted_domain_s3website = rgw_find_host_in_domains(info.host, &s3website_domain, &s3website_subdomain, hostnames_s3website_set); + if (in_hosted_domain_s3website) { + in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website? + domain = s3website_domain; + subdomain = s3website_subdomain; + } + } + + ldout(s->cct, 20) + << "subdomain=" << subdomain + << " domain=" << domain + << " in_hosted_domain=" << in_hosted_domain + << " in_hosted_domain_s3website=" << in_hosted_domain_s3website << dendl; if (g_conf()->rgw_resolve_cname @@ -2112,10 +2506,10 @@ int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio) } ldout(s->cct, 20) - << "subdomain=" << subdomain - << " domain=" << domain - << " in_hosted_domain=" << in_hosted_domain - << " in_hosted_domain_s3website=" << in_hosted_domain_s3website + << "subdomain=" << subdomain + << " domain=" << domain + << " in_hosted_domain=" << in_hosted_domain + << " in_hosted_domain_s3website=" << in_hosted_domain_s3website << dendl; } } @@ -2271,6 +2665,48 @@ int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio) return 0; } +RGWHandler_REST* RGWREST::get_handler( + rgw::sal::RGWRadosStore * const store, + struct req_state* const s, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string& frontend_prefix, + RGWRestfulIO* const rio, + RGWRESTMgr** const pmgr, + int* const init_error, + Jager_Tracer& tracer,const Span& parent_span +) { + Span span=tracer.child_span("rgw_resr.cc RGWHandler_REST* RGWREST::get_handler()",parent_span); + *init_error = preprocess(s, rio,tracer,span); + if (*init_error < 0) { + return nullptr; + } + + RGWRESTMgr *m = mgr.get_manager(s, frontend_prefix, s->decoded_uri, + &s->relative_uri); + if (! m) { + *init_error = -ERR_METHOD_NOT_ALLOWED; + return nullptr; + } + + if (pmgr) { + *pmgr = m; + } + + RGWHandler_REST* handler = m->get_handler(s, auth_registry, frontend_prefix); + if (! handler) { + *init_error = -ERR_METHOD_NOT_ALLOWED; + return NULL; + } + *init_error = handler->init(store, s, rio,tracer,span); + if (*init_error < 0) { + m->put_handler(handler); + return nullptr; + } + + return handler; +} + + RGWHandler_REST* RGWREST::get_handler( rgw::sal::RGWRadosStore * const store, struct req_state* const s, diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h index ce9498453125f..e7845aa8c827d 100644 --- a/src/rgw/rgw_rest.h +++ b/src/rgw/rgw_rest.h @@ -10,6 +10,7 @@ #include "common/sstring.hh" #include "common/ceph_json.h" #include "include/ceph_assert.h" /* needed because of common/ceph_json.h */ +#include "include/tracer.h" #include "rgw_op.h" #include "rgw_formats.h" #include "rgw_client_io.h" @@ -168,6 +169,7 @@ class RGWGetObj_ObjStore : public RGWGetObj } int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; }; class RGWGetObjTags_ObjStore : public RGWGetObjTags { @@ -185,13 +187,13 @@ class RGWPutObjTags_ObjStore: public RGWPutObjTags { class RGWGetBucketTags_ObjStore : public RGWGetBucketTags { public: RGWGetBucketTags_ObjStore() = default; - virtual ~RGWGetBucketTags_ObjStore() = default; + virtual ~RGWGetBucketTags_ObjStore() = default; }; class RGWPutBucketTags_ObjStore: public RGWPutBucketTags { public: RGWPutBucketTags_ObjStore() = default; - virtual ~RGWPutBucketTags_ObjStore() = default; + virtual ~RGWPutBucketTags_ObjStore() = default; }; class RGWGetBucketReplication_ObjStore : public RGWGetBucketReplication { @@ -203,13 +205,13 @@ class RGWGetBucketReplication_ObjStore : public RGWGetBucketReplication { class RGWPutBucketReplication_ObjStore: public RGWPutBucketReplication { public: RGWPutBucketReplication_ObjStore() = default; - virtual ~RGWPutBucketReplication_ObjStore() = default; + virtual ~RGWPutBucketReplication_ObjStore() = default; }; class RGWDeleteBucketReplication_ObjStore: public RGWDeleteBucketReplication { public: RGWDeleteBucketReplication_ObjStore() = default; - virtual ~RGWDeleteBucketReplication_ObjStore() = default; + virtual ~RGWDeleteBucketReplication_ObjStore() = default; }; class RGWListBuckets_ObjStore : public RGWListBuckets { @@ -262,7 +264,9 @@ class RGWPutObj_ObjStore : public RGWPutObj int verify_params() override; int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; int get_data(bufferlist& bl) override; + int get_data(bufferlist& bl,Jager_Tracer&,const Span&) override; }; class RGWPostObj_ObjStore : public RGWPostObj @@ -585,7 +589,9 @@ class RGWHandler_REST : public RGWHandler { static int reallocate_formatter(struct req_state *s, int type); int init_permissions(RGWOp* op) override; + int init_permissions(RGWOp* op,Jager_Tracer&,const Span&) override; int read_permissions(RGWOp* op) override; + int read_permissions(RGWOp* op,Jager_Tracer&,const Span&) override; virtual RGWOp* get_op(void); virtual void put_op(RGWOp* op); @@ -668,8 +674,16 @@ class RGWREST { RGWRESTMgr mgr; static int preprocess(struct req_state *s, rgw::io::BasicClient* rio); + static int preprocess(struct req_state *s, rgw::io::BasicClient* rio,Jager_Tracer&,const Span&); public: RGWREST() {} + RGWHandler_REST *get_handler(rgw::sal::RGWRadosStore *store, + struct req_state *s, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string& frontend_prefix, + RGWRestfulIO *rio, + RGWRESTMgr **pmgr, + int *init_error,Jager_Tracer&,const Span&); RGWHandler_REST *get_handler(rgw::sal::RGWRadosStore *store, struct req_state *s, const rgw::auth::StrategyRegistry& auth_registry, @@ -827,6 +841,7 @@ extern void list_all_buckets_end(struct req_state *s); extern void dump_time(struct req_state *s, const char *name, real_time *t); extern std::string dump_time_to_str(const real_time& t); extern void dump_bucket_from_state(struct req_state *s); +extern void dump_bucket_from_state(struct req_state *s, Jager_Tracer&, const Span&); extern void dump_redirect(struct req_state *s, const string& redirect); extern bool is_valid_url(const char *url); extern void dump_access_control(struct req_state *s, const char *origin, diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 532d738b58075..a33020c4533fd 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -161,12 +161,25 @@ int RGWGetObj_ObjStore_S3::get_params() return RGWGetObj_ObjStore::get_params(); } +int RGWGetObj_ObjStore_S3::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_3.cc RGWGetObj_ObjStore_S3::get_params", parent_span); + return RGWGetObj_ObjStore_S3::get_params(); +} + int RGWGetObj_ObjStore_S3::send_response_data_error() { bufferlist bl; return send_response_data(bl, 0 , 0); } +int RGWGetObj_ObjStore_S3::send_response_data_error(const Span& parent_span) +{ + parent_span->SetTag("operation_gateway" ,"s3"); + bufferlist bl; + return send_response_data(bl, 0 , 0); +} + template int decode_attr_bl_single_value(map& attrs, const char *attr_name, T *result, T def_val) { @@ -396,6 +409,12 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, return 0; } +int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,off_t bl_len, const Span& parent_span) +{ + parent_span->SetTag("operation_gateway", "s3"); + return RGWGetObj_ObjStore_S3::send_response_data(bl, bl_ofs, bl_len); +} + int RGWGetObj_ObjStore_S3::get_decrypt_filter(std::unique_ptr *filter, RGWGetObj_Filter* cb, bufferlist* manifest_bl) { if (skip_decrypt) { // bypass decryption for multisite sync requests @@ -1192,6 +1211,12 @@ void RGWListBuckets_ObjStore_S3::send_response_begin(bool has_buckets) } } +void RGWListBuckets_ObjStore_S3::send_response_begin(bool has_buckets, Jager_Tracer& tracer, const Span& parent_span, Span& span) +{ + span = tracer.child_span("rgw_rest_s3.cc RGWListBuckets_ObjStore_S3::send_response_begin", parent_span); + RGWListBuckets_ObjStore_S3::send_response_begin(has_buckets); +} + void RGWListBuckets_ObjStore_S3::send_response_data(rgw::sal::RGWBucketList& buckets) { if (!sent_data) @@ -1207,6 +1232,12 @@ void RGWListBuckets_ObjStore_S3::send_response_data(rgw::sal::RGWBucketList& buc rgw_flush_formatter(s, s->formatter); } +void RGWListBuckets_ObjStore_S3::send_response_data(rgw::sal::RGWBucketList& buckets, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWListBuckets_ObjStore_S3::send_response_data", parent_span); + RGWListBuckets_ObjStore_S3::send_response_data(buckets); +} + void RGWListBuckets_ObjStore_S3::send_response_end() { if (sent_data) { @@ -1216,6 +1247,13 @@ void RGWListBuckets_ObjStore_S3::send_response_end() } } +void RGWListBuckets_ObjStore_S3::send_response_end(Span span, const Span& parent_span) +{ + parent_span->SetTag("operation_gateway", "s3"); + RGWListBuckets_ObjStore_S3::send_response_end(); + span->Finish(); +} + int RGWGetUsage_ObjStore_S3::get_params() { start_date = s->info.args.get("start-date"); @@ -1389,6 +1427,11 @@ int RGWListBucket_ObjStore_S3::get_common_params() return 0; } +int RGWListBucket_ObjStore_S3::get_common_params(Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_s3.cc RGWListBucket_ObjStore_S3::get_common_params", parent_span); + return RGWListBucket_ObjStore_S3::get_common_params(); +} + int RGWListBucket_ObjStore_S3::get_params() { int ret = get_common_params(); @@ -1404,6 +1447,22 @@ int RGWListBucket_ObjStore_S3::get_params() return 0; } +int RGWListBucket_ObjStore_S3::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWListBucket_ObjStore_S3::get_params", parent_span); + int ret = get_common_params(tracer, span); + if (ret < 0) { + return ret; + } + if (!list_versions) { + marker = s->info.args.get("marker"); + } else { + marker.name = s->info.args.get("key-marker"); + marker.instance = s->info.args.get("version-id-marker"); + } + return 0; +} + int RGWListBucket_ObjStore_S3v2::get_params() { int ret = get_common_params(); @@ -1621,6 +1680,12 @@ void RGWListBucket_ObjStore_S3::send_response() s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); } + +void RGWListBucket_ObjStore_S3::send_response(Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_s3.cc ",parent_span); + RGWListBucket_ObjStore_S3::send_response(); + parent_span->SetTag("operation_gateway", "s3"); +} void RGWListBucket_ObjStore_S3v2::send_versioned_response() { @@ -2212,6 +2277,11 @@ int RGWCreateBucket_ObjStore_S3::get_params() return 0; } +int RGWCreateBucket_ObjStore_S3::get_params(Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_s3.cc RGWCreateBucket_ObjStore_S3::get_params", parent_span); + return RGWCreateBucket_ObjStore_S3::get_params(); +} + void RGWCreateBucket_ObjStore_S3::send_response() { if (op_ret == -ERR_BUCKET_EXISTS) @@ -2236,6 +2306,13 @@ void RGWCreateBucket_ObjStore_S3::send_response() } } +void RGWCreateBucket_ObjStore_S3::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWCreateBucket_ObjStore_S3::send_response", parent_span); + RGWCreateBucket_ObjStore_S3::send_response(); + parent_span->SetTag("operation_gateway", "s3"); +} + void RGWDeleteBucket_ObjStore_S3::send_response() { int r = op_ret; @@ -2247,6 +2324,13 @@ void RGWDeleteBucket_ObjStore_S3::send_response() end_header(s, this); } +void RGWDeleteBucket_ObjStore_S3::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWDeleteBucket_ObjStore_S3::send_response", parent_span); + RGWDeleteBucket_ObjStore_S3::send_response(); + parent_span->SetTag("operation_gateway", "s3"); +} + static inline void map_qs_metadata(struct req_state* s) { /* merge S3 valid user metadata from the query-string into @@ -2445,6 +2529,192 @@ int RGWPutObj_ObjStore_S3::get_params() return RGWPutObj_ObjStore::get_params(); } +int RGWPutObj_ObjStore_S3::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWPutObj_ObjStore_S3::get_params", parent_span); + if (!s->length) + return -ERR_LENGTH_REQUIRED; + + map src_attrs; + size_t pos; + int ret; + + map_qs_metadata(s); + + RGWAccessControlPolicy_S3 s3policy(s->cct); + ret = create_s3_policy(s, store, s3policy, s->owner); + if (ret < 0) + return ret; + + policy = s3policy; + + if_match = s->info.env->get("HTTP_IF_MATCH"); + if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH"); + copy_source = url_decode(s->info.env->get("HTTP_X_AMZ_COPY_SOURCE", "")); + copy_source_range = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE_RANGE"); + + /* handle x-amz-copy-source */ + boost::string_view cs_view(copy_source); + if (! cs_view.empty()) { + if (cs_view[0] == '/') + cs_view.remove_prefix(1); + copy_source_bucket_name = cs_view.to_string(); + pos = copy_source_bucket_name.find("/"); + if (pos == std::string::npos) { + ret = -EINVAL; + ldpp_dout(this, 5) << "x-amz-copy-source bad format" << dendl; + return ret; + } + copy_source_object_name = + copy_source_bucket_name.substr(pos + 1, copy_source_bucket_name.size()); + copy_source_bucket_name = copy_source_bucket_name.substr(0, pos); +#define VERSION_ID_STR "?versionId=" + pos = copy_source_object_name.find(VERSION_ID_STR); + if (pos == std::string::npos) { + copy_source_object_name = url_decode(copy_source_object_name); + } else { + copy_source_version_id = + copy_source_object_name.substr(pos + sizeof(VERSION_ID_STR) - 1); + copy_source_object_name = + url_decode(copy_source_object_name.substr(0, pos)); + } + pos = copy_source_bucket_name.find(":"); + if (pos == std::string::npos) { + copy_source_tenant_name = s->src_tenant_name; + } else { + copy_source_tenant_name = copy_source_bucket_name.substr(0, pos); + copy_source_bucket_name = copy_source_bucket_name.substr(pos + 1, copy_source_bucket_name.size()); + if (copy_source_bucket_name.empty()) { + ret = -EINVAL; + ldpp_dout(this, 5) << "source bucket name is empty" << dendl; + return ret; + } + } + ret = store->getRados()->get_bucket_info(store->svc(), + copy_source_tenant_name, + copy_source_bucket_name, + copy_source_bucket_info, + NULL, s->yield, &src_attrs); + if (ret < 0) { + ldpp_dout(this, 5) << __func__ << "(): get_bucket_info() returned ret=" << ret << dendl; + return ret; + } + + /* handle x-amz-copy-source-range */ + + if (copy_source_range) { + string range = copy_source_range; + pos = range.find("bytes="); + if (pos == std::string::npos || pos != 0) { + ret = -EINVAL; + ldpp_dout(this, 5) << "x-amz-copy-source-range bad format" << dendl; + return ret; + } + /* 6 is the length of "bytes=" */ + range = range.substr(pos + 6); + pos = range.find("-"); + if (pos == std::string::npos) { + ret = -EINVAL; + ldpp_dout(this, 5) << "x-amz-copy-source-range bad format" << dendl; + return ret; + } + string first = range.substr(0, pos); + string last = range.substr(pos + 1); + if (first.find_first_not_of("0123456789") != std::string::npos || last.find_first_not_of("0123456789") != std::string::npos) + { + ldpp_dout(this, 5) << "x-amz-copy-source-range bad format not an integer" << dendl; + ret = -EINVAL; + return ret; + } + copy_source_range_fst = strtoull(first.c_str(), NULL, 10); + copy_source_range_lst = strtoull(last.c_str(), NULL, 10); + if (copy_source_range_fst > copy_source_range_lst) + { + ret = -ERANGE; + ldpp_dout(this, 5) << "x-amz-copy-source-range bad format first number bigger than second" << dendl; + return ret; + } + } + + } /* copy_source */ + + /* handle object tagging */ + auto tag_str = s->info.env->get("HTTP_X_AMZ_TAGGING"); + if (tag_str){ + obj_tags = std::make_unique(); + ret = obj_tags->set_from_string(tag_str); + if (ret < 0){ + ldpp_dout(this,0) << "setting obj tags failed with " << ret << dendl; + if (ret == -ERR_INVALID_TAG){ + ret = -EINVAL; //s3 returns only -EINVAL for PUT requests + } + + return ret; + } + } + + //handle object lock + auto obj_lock_mode_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_MODE"); + auto obj_lock_date_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE"); + auto obj_legal_hold_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_LEGAL_HOLD"); + if (obj_lock_mode_str && obj_lock_date_str) { + boost::optional date = ceph::from_iso_8601(obj_lock_date_str); + if (boost::none == date || ceph::real_clock::to_time_t(*date) <= ceph_clock_now()) { + ret = -EINVAL; + ldpp_dout(this,0) << "invalid x-amz-object-lock-retain-until-date value" << dendl; + return ret; + } + if (strcmp(obj_lock_mode_str, "GOVERNANCE") != 0 && strcmp(obj_lock_mode_str, "COMPLIANCE") != 0) { + ret = -EINVAL; + ldpp_dout(this,0) << "invalid x-amz-object-lock-mode value" << dendl; + return ret; + } + obj_retention = new RGWObjectRetention(obj_lock_mode_str, *date); + } else if ((obj_lock_mode_str && !obj_lock_date_str) || (!obj_lock_mode_str && obj_lock_date_str)) { + ret = -EINVAL; + ldpp_dout(this,0) << "need both x-amz-object-lock-mode and x-amz-object-lock-retain-until-date " << dendl; + return ret; + } + if (obj_legal_hold_str) { + if (strcmp(obj_legal_hold_str, "ON") != 0 && strcmp(obj_legal_hold_str, "OFF") != 0) { + ret = -EINVAL; + ldpp_dout(this,0) << "invalid x-amz-object-lock-legal-hold value" << dendl; + return ret; + } + obj_legal_hold = new RGWObjectLegalHold(obj_legal_hold_str); + } + if (!s->bucket_info.obj_lock_enabled() && (obj_retention || obj_legal_hold)) { + ldpp_dout(this, 0) << "ERROR: object retention or legal hold can't be set if bucket object lock not configured" << dendl; + ret = -ERR_INVALID_REQUEST; + return ret; + } + multipart_upload_id = s->info.args.get("uploadId"); + multipart_part_str = s->info.args.get("partNumber"); + if (!multipart_part_str.empty()) { + string err; + multipart_part_num = strict_strtol(multipart_part_str.c_str(), 10, &err); + if (!err.empty()) { + ldpp_dout(s, 10) << "bad part number: " << multipart_part_str << ": " << err << dendl; + return -EINVAL; + } + } else if (!multipart_upload_id.empty()) { + ldpp_dout(s, 10) << "part number with no multipart upload id" << dendl; + return -EINVAL; + } + + append = s->info.args.exists("append"); + if (append) { + string pos_str = s->info.args.get("position"); + if (pos_str.empty()) { + return -EINVAL; + } else { + position = strtoull(pos_str.c_str(), NULL, 10); + } + } + + return RGWPutObj_ObjStore::get_params(tracer, span); +} + int RGWPutObj_ObjStore_S3::get_data(bufferlist& bl) { const int ret = RGWPutObj_ObjStore::get_data(bl); @@ -2524,6 +2794,13 @@ void RGWPutObj_ObjStore_S3::send_response() end_header(s, this); } +void RGWPutObj_ObjStore_S3::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWPutObj_ObjStore_S3::send_response", parent_span); + RGWPutObj_ObjStore_S3::send_response(); + parent_span->SetTag("opearation_gateway", "s3"); +} + static inline int get_obj_attrs(rgw::sal::RGWRadosStore *store, struct req_state *s, rgw_obj& obj, map& attrs) { RGWRados::Object op_target(store->getRados(), s->bucket_info, *static_cast(s->obj_ctx), obj); @@ -3186,6 +3463,13 @@ void RGWDeleteObj_ObjStore_S3::send_response() end_header(s, this); } +void RGWDeleteObj_ObjStore_S3::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_s3.cc RGWDeleteObj_ObjStore_S3::send_response", parent_span); + RGWDeleteObj_ObjStore_S3::send_response(); + parent_span->SetTag("operation_gateway", "s3"); +} + int RGWCopyObj_ObjStore_S3::init_dest_policy() { RGWAccessControlPolicy_S3 s3policy(s->cct); diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index 7bb2f9d260f90..929bce0b6da8a 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -47,8 +47,11 @@ class RGWGetObj_ObjStore_S3 : public RGWGetObj_ObjStore ~RGWGetObj_ObjStore_S3() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; int send_response_data_error() override; + int send_response_data_error(const Span&) override; int send_response_data(bufferlist& bl, off_t ofs, off_t len) override; + int send_response_data(bufferlist& bl, off_t ofs, off_t len, const Span&) override; void set_custom_http_response(int http_ret) { custom_http_ret = http_ret; } int get_decrypt_filter(std::unique_ptr* filter, RGWGetObj_Filter* cb, @@ -131,9 +134,17 @@ class RGWListBuckets_ObjStore_S3 : public RGWListBuckets_ObjStore { limit = -1; /* no limit */ return 0; } + int get_params(Jager_Tracer& tracer, const Span& parent_span) override { + Span span = tracer.child_span("rgw_rest_s3.h RGWListBuckets_ObjStore_S3::get_params", parent_span); + limit = -1; /* no limit */ + return 0; + } void send_response_begin(bool has_buckets) override; + void send_response_begin(bool has_buckets, Jager_Tracer&, const Span&, Span&) override; void send_response_data(rgw::sal::RGWBucketList& buckets) override; + void send_response_data(rgw::sal::RGWBucketList& buckets, Jager_Tracer& tracer, const Span& parent_span) override; void send_response_end() override; + void send_response_end(Span,const Span&) override; }; class RGWGetUsage_ObjStore_S3 : public RGWGetUsage_ObjStore { @@ -150,6 +161,7 @@ class RGWListBucket_ObjStore_S3 : public RGWListBucket_ObjStore { bool objs_container; bool encode_key {false}; int get_common_params(); + int get_common_params(Jager_Tracer&, const Span&); void send_common_response(); void send_common_versioned_response(); public: @@ -159,7 +171,9 @@ class RGWListBucket_ObjStore_S3 : public RGWListBucket_ObjStore { ~RGWListBucket_ObjStore_S3() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; void send_versioned_response(); }; @@ -251,7 +265,9 @@ class RGWCreateBucket_ObjStore_S3 : public RGWCreateBucket_ObjStore { ~RGWCreateBucket_ObjStore_S3() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWDeleteBucket_ObjStore_S3 : public RGWDeleteBucket_ObjStore { @@ -260,6 +276,7 @@ class RGWDeleteBucket_ObjStore_S3 : public RGWDeleteBucket_ObjStore { ~RGWDeleteBucket_ObjStore_S3() override {} void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWPutObj_ObjStore_S3 : public RGWPutObj_ObjStore { @@ -271,8 +288,10 @@ class RGWPutObj_ObjStore_S3 : public RGWPutObj_ObjStore { ~RGWPutObj_ObjStore_S3() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; int get_data(bufferlist& bl) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; int get_encrypt_filter(std::unique_ptr *filter, rgw::putobj::DataProcessor *cb) override; @@ -324,6 +343,7 @@ class RGWDeleteObj_ObjStore_S3 : public RGWDeleteObj_ObjStore { int get_params() override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWCopyObj_ObjStore_S3 : public RGWCopyObj_ObjStore { diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index fb392828a42ac..8a5ad599cbb77 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -41,6 +41,17 @@ int RGWListBuckets_ObjStore_SWIFT::get_params() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest_swift.h get_params", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest_swift.cc get_params"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif + prefix = s->info.args.get("prefix"); marker = s->info.args.get("marker"); end_marker = s->info.args.get("end_marker"); @@ -83,6 +94,11 @@ int RGWListBuckets_ObjStore_SWIFT::get_params() return 0; } +int RGWListBuckets_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::get_params", parent_span); + return RGWListBuckets_ObjStore_SWIFT::get_params(); +} + static void dump_account_metadata(struct req_state * const s, const RGWUsageStats& global_stats, const std::map &policies_stats, @@ -163,6 +179,16 @@ static void dump_account_metadata(struct req_state * const s, void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_begin", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_begin"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (op_ret) { set_req_state_err(s, op_ret); } else if (!has_buckets && s->format == RGW_FORMAT_PLAIN) { @@ -192,8 +218,24 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets) } } +void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets, Jager_Tracer& tracer, const Span& parent_span, Span& span) +{ + span = tracer.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_begin", parent_span); + RGWListBuckets_ObjStore_SWIFT::send_response_begin(has_buckets); +} + void RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk(rgw::sal::RGWBucketList&& buckets) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (wants_reversed) { /* Just store in the reversal buffer. Its content will be handled later, * in send_response_end(). */ @@ -203,8 +245,30 @@ void RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk(rgw::sal::RGWBucketList } } +void RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk(rgw::sal::RGWBucketList&& buckets, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk", parent_span); + if (wants_reversed) { + /* Just store in the reversal buffer. Its content will be handled later, + * in send_response_end(). */ + reverse_buffer.emplace(std::begin(reverse_buffer), std::move(buckets)); + } else { + return send_response_data(buckets, tracer, span); + } +} + void RGWListBuckets_ObjStore_SWIFT::send_response_data(rgw::sal::RGWBucketList& buckets) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_data", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_data"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (! sent_data) { return; } @@ -221,6 +285,12 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_data(rgw::sal::RGWBucketList& } } +void RGWListBuckets_ObjStore_SWIFT::send_response_data(rgw::sal::RGWBucketList& buckets, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWListBuckets_ObjStore_SWIFT::send_response_data", parent_span); + RGWListBuckets_ObjStore_SWIFT::send_response_data(buckets); +} + void RGWListBuckets_ObjStore_SWIFT::dump_bucket_entry(const rgw::sal::RGWBucket& obj) { s->formatter->open_object_section("container"); @@ -293,8 +363,25 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_end() } } +void RGWListBuckets_ObjStore_SWIFT::send_response_end(Span span, const Span& parent_span) +{ + parent_span->SetTag("operation_gateway", "swift"); + RGWListBuckets_ObjStore_SWIFT::send_response_end(); + span->Finish(); +} + int RGWListBucket_ObjStore_SWIFT::get_params() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_rest_swift.cc RGWListBucket_ObjStore_SWIFT::get_params", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_rest_swift.cc RGWListBucket_ObjStore_SWIFT::get_params"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif prefix = s->info.args.get("prefix"); marker = s->info.args.get("marker"); end_marker = s->info.args.get("end_marker"); @@ -339,6 +426,53 @@ int RGWListBucket_ObjStore_SWIFT::get_params() return 0; } +int RGWListBucket_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_swift.cc RGWListBucket_ObjStore_SWIFT::get_params", parent_span); + prefix = s->info.args.get("prefix"); + marker = s->info.args.get("marker"); + end_marker = s->info.args.get("end_marker"); + max_keys = s->info.args.get("limit"); + + // non-standard + s->info.args.get_bool("allow_unordered", &allow_unordered, false); + + delimiter = s->info.args.get("delimiter"); + + op_ret = parse_max_keys(tracer, span); + if (op_ret < 0) { + return op_ret; + } + // S3 behavior is to silently cap the max-keys. + // Swift behavior is to abort. + if (max > default_max) + return -ERR_PRECONDITION_FAILED; + + string path_args; + if (s->info.args.exists("path")) { // should handle empty path + path_args = s->info.args.get("path"); + if (!delimiter.empty() || !prefix.empty()) { + return -EINVAL; + } + prefix = path_args; + delimiter="/"; + + path = prefix; + if (path.size() && path[path.size() - 1] != '/') + path.append("/"); + + int len = prefix.size(); + int delim_size = delimiter.size(); + + if (len >= delim_size) { + if (prefix.substr(len - delim_size).compare(delimiter) != 0) + prefix.append(delimiter); + } + } + + return 0; + +} + static void dump_container_metadata(struct req_state *, const rgw::sal::RGWBucket*, const RGWQuotaInfo&, @@ -346,6 +480,18 @@ static void dump_container_metadata(struct req_state *, void RGWListBucket_ObjStore_SWIFT::send_response() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()){ + span = tracer_2.child_span("rgw_rest_swift.cc send_response", global_state->stack_span.top()); + global_state->stack_span.top()->SetTag("operation_gateway","swift"); + } + else if(global_state) + span = tracer_2.new_span("rgw_rest_swift.cc send_response"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif vector::iterator iter = objs.begin(); map::iterator pref_iter = common_prefixes.begin(); @@ -449,6 +595,12 @@ void RGWListBucket_ObjStore_SWIFT::send_response() rgw_flush_formatter_and_reset(s, s->formatter); } // RGWListBucket_ObjStore_SWIFT::send_response +void RGWListBucket_ObjStore_SWIFT::send_response(Jager_Tracer& tracer,const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_swift.cc RGWListBucket_ObjStore_SWIFT::send_response", parent_span); + RGWListBucket_ObjStore_SWIFT::send_response(); + parent_span->SetTag("operation_gateway", "swift"); +} + static void dump_container_metadata(struct req_state *s, const rgw::sal::RGWBucket* bucket, const RGWQuotaInfo& quota, @@ -719,6 +871,12 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params() return get_swift_versioning_settings(s, swift_ver_location); } +int RGWCreateBucket_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWCreateBucket_ObjStore_SWIFT::get_params", parent_span); + return RGWCreateBucket_ObjStore_SWIFT::get_params(); +} + static inline int handle_metadata_errors(req_state* const s, const int op_ret) { if (op_ret == -EFBIG) { @@ -760,8 +918,27 @@ void RGWCreateBucket_ObjStore_SWIFT::send_response() rgw_flush_formatter_and_reset(s, s->formatter); } +void RGWCreateBucket_ObjStore_SWIFT::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWCreateBucket_ObjStore_SWIFT::send_response", parent_span); + RGWCreateBucket_ObjStore_SWIFT::send_response(); + parent_span->SetTag("operation_gateway", "swift"); +} + void RGWDeleteBucket_ObjStore_SWIFT::send_response() { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()){ + span = tracer_2.child_span("rgw_rest_swift.cc RGWDeleteBucket_ObjStore_SWIFT::send_response", global_state->stack_span.top()); + global_state->stack_span.top()->SetTag("operation_gateway", "swift"); + } + else if(global_state) + span = tracer_2.new_span("rgw_rest_swift.cc RGWDeleteBucket_ObjStore_SWIFT::send_response"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif int r = op_ret; if (!r) r = STATUS_NO_CONTENT; @@ -772,6 +949,13 @@ void RGWDeleteBucket_ObjStore_SWIFT::send_response() rgw_flush_formatter_and_reset(s, s->formatter); } +void RGWDeleteBucket_ObjStore_SWIFT::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWDeleteBucket_ObjStore_SWIFT::send_response", parent_span); + RGWDeleteBucket_ObjStore_SWIFT::send_response(); + parent_span->SetTag("operation_gateway", "swift"); +} + static int get_delete_at_param(req_state *s, boost::optional &delete_at) { /* Handle Swift object expiration. */ @@ -810,6 +994,11 @@ static int get_delete_at_param(req_state *s, boost::optional &delete_ return 0; } +static int get_delete_at_param(req_state *s, boost::optional &delete_at, Jager_Tracer& tracer, const Span& parent_span){ + Span span = tracer.child_span("rgw_rest_swift.cc get_delete_at_param", parent_span); + return get_delete_at_param(s, delete_at); +} + int RGWPutObj_ObjStore_SWIFT::verify_permission() { op_ret = RGWPutObj_ObjStore::verify_permission(); @@ -824,6 +1013,21 @@ int RGWPutObj_ObjStore_SWIFT::verify_permission() } } +int RGWPutObj_ObjStore_SWIFT::verify_permission(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWPutObj_ObjStore_SWIFT::verify_permission", parent_span); + op_ret = RGWPutObj_ObjStore::verify_permission(tracer, parent_span); + + /* We have to differentiate error codes depending on whether user is + * anonymous (401 Unauthorized) or he doesn't have necessary permissions + * (403 Forbidden). */ + if (s->auth.identity->is_anonymous() && op_ret == -EACCES) { + return -EPERM; + } else { + return op_ret; + } +} + int RGWPutObj_ObjStore_SWIFT::update_slo_segment_size(rgw_slo_entry& entry) { int r = 0; @@ -1017,6 +1221,114 @@ int RGWPutObj_ObjStore_SWIFT::get_params() return RGWPutObj_ObjStore::get_params(); } +int RGWPutObj_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWPutObj_ObjStore_SWIFT::get_params", parent_span); + if (s->has_bad_meta) { + return -EINVAL; + } + + if (!s->length) { + const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING"); + if (!encoding || strcmp(encoding, "chunked") != 0) { + ldpp_dout(this, 20) << "neither length nor chunked encoding" << dendl; + return -ERR_LENGTH_REQUIRED; + } + + chunked_upload = true; + } + + supplied_etag = s->info.env->get("HTTP_ETAG"); + + if (!s->generic_attrs.count(RGW_ATTR_CONTENT_TYPE)) { + ldpp_dout(this, 5) << "content type wasn't provided, trying to guess" << dendl; + const char *suffix = strrchr(s->object.name.c_str(), '.'); + if (suffix) { + suffix++; + if (*suffix) { + string suffix_str(suffix); + const char *mime = rgw_find_mime_by_ext(suffix_str); + if (mime) { + s->generic_attrs[RGW_ATTR_CONTENT_TYPE] = mime; + } + } + } + } + + policy.create_default(s->user->get_id(), s->user->get_display_name(), tracer, span); + + int r = get_delete_at_param(s, delete_at, tracer, span); + if (r < 0) { + ldpp_dout(this, 5) << "ERROR: failed to get Delete-At param" << dendl; + return r; + } + + if (!s->cct->_conf->rgw_swift_custom_header.empty()) { + string custom_header = s->cct->_conf->rgw_swift_custom_header; + if (s->info.env->exists(custom_header.c_str())) { + user_data = s->info.env->get(custom_header.c_str()); + } + } + + dlo_manifest = s->info.env->get("HTTP_X_OBJECT_MANIFEST"); + bool exists; + string multipart_manifest = s->info.args.get("multipart-manifest", &exists); + if (exists) { + if (multipart_manifest != "put") { + ldpp_dout(this, 5) << "invalid multipart-manifest http param: " << multipart_manifest << dendl; + return -EINVAL; + } + +#define MAX_SLO_ENTRY_SIZE (1024 + 128) // 1024 - max obj name, 128 - enough extra for other info + uint64_t max_len = s->cct->_conf->rgw_max_slo_entries * MAX_SLO_ENTRY_SIZE; + + slo_info = new RGWSLOInfo; + + int r = 0; + std::tie(r, slo_info->raw_data) = rgw_rest_get_json_input_keep_data(s->cct, s, slo_info->entries, max_len); + if (r < 0) { + ldpp_dout(this, 5) << "failed to read input for slo r=" << r << dendl; + return r; + } + + if ((int64_t)slo_info->entries.size() > s->cct->_conf->rgw_max_slo_entries) { + ldpp_dout(this, 5) << "too many entries in slo request: " << slo_info->entries.size() << dendl; + return -EINVAL; + } + + MD5 etag_sum; + uint64_t total_size = 0; + for (auto& entry : slo_info->entries) { + etag_sum.Update((const unsigned char *)entry.etag.c_str(), + entry.etag.length()); + + /* if size_bytes == 0, it should be replaced with the + * real segment size (which could be 0); this follows from the + * fact that Swift requires all segments to exist, but permits + * the size_bytes element to be omitted from the SLO manifest, see + * https://docs.openstack.org/swift/latest/api/large_objects.html + */ + r = update_slo_segment_size(entry); + if (r < 0) { + return r; + } + + total_size += entry.size_bytes; + + ldpp_dout(this, 20) << "slo_part: " << entry.path + << " size=" << entry.size_bytes + << " etag=" << entry.etag + << dendl; + } + complete_etag(etag_sum, &lo_etag); + slo_info->total_size = total_size; + + ofs = slo_info->raw_data.length(); + } + + return RGWPutObj_ObjStore::get_params(tracer, span); +} + void RGWPutObj_ObjStore_SWIFT::send_response() { const auto meta_ret = handle_metadata_errors(s, op_ret); @@ -1050,6 +1362,13 @@ void RGWPutObj_ObjStore_SWIFT::send_response() rgw_flush_formatter_and_reset(s, s->formatter); } +void RGWPutObj_ObjStore_SWIFT::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWPutObj_ObjStore_SWIFT::send_response", parent_span); + RGWPutObj_ObjStore_SWIFT::send_response(); + parent_span->SetTag("operation_gateway", "swift"); +} + static int get_swift_account_settings(req_state * const s, rgw::sal::RGWRadosStore * const store, RGWAccessControlPolicy_SWIFTAcct * const policy, @@ -1257,6 +1576,21 @@ int RGWDeleteObj_ObjStore_SWIFT::verify_permission() } } +int RGWDeleteObj_ObjStore_SWIFT::verify_permission(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWDeleteObj_ObjStore_SWIFT::verify_permission", parent_span); + op_ret = RGWDeleteObj_ObjStore::verify_permission(); + + /* We have to differentiate error codes depending on whether user is + * anonymous (401 Unauthorized) or he doesn't have necessary permissions + * (403 Forbidden). */ + if (s->auth.identity->is_anonymous() && op_ret == -EACCES) { + return -EPERM; + } else { + return op_ret; + } +} + int RGWDeleteObj_ObjStore_SWIFT::get_params() { const string& mm = s->info.args.get("multipart-manifest"); @@ -1265,6 +1599,15 @@ int RGWDeleteObj_ObjStore_SWIFT::get_params() return RGWDeleteObj_ObjStore::get_params(); } +int RGWDeleteObj_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWDeleteObj_ObjStore_SWIFT::get_params", parent_span); + const string& mm = s->info.args.get("multipart-manifest"); + multipart_delete = (mm.compare("delete") == 0); + + return RGWDeleteObj_ObjStore::get_params(); +} + void RGWDeleteObj_ObjStore_SWIFT::send_response() { int r = op_ret; @@ -1309,6 +1652,13 @@ void RGWDeleteObj_ObjStore_SWIFT::send_response() } +void RGWDeleteObj_ObjStore_SWIFT::send_response(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWDeleteObj_ObjStore_SWIFT::send_response", parent_span); + RGWDeleteObj_ObjStore_SWIFT::send_response(); + parent_span->SetTag("operation_gateway", "swift"); +} + static void get_contype_from_attrs(map& attrs, string& content_type) { @@ -1482,6 +1832,21 @@ int RGWGetObj_ObjStore_SWIFT::verify_permission() } } +int RGWGetObj_ObjStore_SWIFT::verify_permission(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWGetObj_ObjStore_SWIFT::verify_permission", parent_span); + op_ret = RGWGetObj_ObjStore::verify_permission(tracer, span); + + /* We have to differentiate error codes depending on whether user is + * anonymous (401 Unauthorized) or he doesn't have necessary permissions + * (403 Forbidden). */ + if (s->auth.identity->is_anonymous() && op_ret == -EACCES) { + return -EPERM; + } else { + return op_ret; + } +} + int RGWGetObj_ObjStore_SWIFT::get_params() { const string& mm = s->info.args.get("multipart-manifest"); @@ -1490,6 +1855,15 @@ int RGWGetObj_ObjStore_SWIFT::get_params() return RGWGetObj_ObjStore::get_params(); } +int RGWGetObj_ObjStore_SWIFT::get_params(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_rest_swift.cc RGWGetObj_ObjStore_SWIFT::get_params", parent_span); + const string& mm = s->info.args.get("multipart-manifest"); + skip_manifest = (mm.compare("get") == 0); + + return RGWGetObj_ObjStore::get_params(tracer, span); +} + int RGWGetObj_ObjStore_SWIFT::send_response_data_error() { std::string error_content; @@ -1504,6 +1878,12 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data_error() return send_response_data(error_bl, 0, error_bl.length()); } +int RGWGetObj_ObjStore_SWIFT::send_response_data_error(const Span& parent_span) +{ + parent_span->SetTag("operation_gateway", "swift"); + return RGWGetObj_ObjStore_SWIFT::send_response_data_error(); +} + int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, const off_t bl_ofs, const off_t bl_len) @@ -1575,6 +1955,14 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, return 0; } +int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, + const off_t bl_ofs, + const off_t bl_len, const Span& parent_span) +{ + parent_span->SetTag("operation_gateway", "swift"); + return RGWGetObj_ObjStore_SWIFT::send_response_data(bl, bl_ofs, bl_len); +} + void RGWOptionsCORS_ObjStore_SWIFT::send_response() { string hdrs, exp_hdrs; diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h index 98e2fb144f098..49cff3daae102 100644 --- a/src/rgw/rgw_rest_swift.h +++ b/src/rgw/rgw_rest_swift.h @@ -21,9 +21,13 @@ class RGWGetObj_ObjStore_SWIFT : public RGWGetObj_ObjStore { ~RGWGetObj_ObjStore_SWIFT() override {} int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; int send_response_data_error() override; + int send_response_data_error(const Span&) override; int send_response_data(bufferlist& bl, off_t ofs, off_t len) override; + int send_response_data(bufferlist& bl, off_t ofs, off_t len, const Span&) override; void set_custom_http_response(const int http_ret) { custom_http_ret = http_ret; @@ -52,12 +56,17 @@ class RGWListBuckets_ObjStore_SWIFT : public RGWListBuckets_ObjStore { ~RGWListBuckets_ObjStore_SWIFT() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void handle_listing_chunk(rgw::sal::RGWBucketList&& buckets) override; + void handle_listing_chunk(rgw::sal::RGWBucketList&& buckets, Jager_Tracer&, const Span&) override; void send_response_begin(bool has_buckets) override; + void send_response_begin(bool has_buckets, Jager_Tracer&, const Span&, Span&) override; void send_response_data(rgw::sal::RGWBucketList& buckets) override; + void send_response_data(rgw::sal::RGWBucketList& buckets, Jager_Tracer&, const Span&) override; void send_response_data_reversed(rgw::sal::RGWBucketList& buckets); void dump_bucket_entry(const rgw::sal::RGWBucket& obj); void send_response_end() override; + void send_response_end(Span, const Span&) override; bool should_get_stats() override { return need_stats; } bool supports_account_metadata() override { return true; } @@ -72,7 +81,9 @@ class RGWListBucket_ObjStore_SWIFT : public RGWListBucket_ObjStore { ~RGWListBucket_ObjStore_SWIFT() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; bool need_container_stats() override { return true; } }; @@ -103,7 +114,9 @@ class RGWCreateBucket_ObjStore_SWIFT : public RGWCreateBucket_ObjStore { ~RGWCreateBucket_ObjStore_SWIFT() override {} int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWDeleteBucket_ObjStore_SWIFT : public RGWDeleteBucket_ObjStore { @@ -112,6 +125,7 @@ class RGWDeleteBucket_ObjStore_SWIFT : public RGWDeleteBucket_ObjStore { ~RGWDeleteBucket_ObjStore_SWIFT() override {} void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWPutObj_ObjStore_SWIFT : public RGWPutObj_ObjStore { @@ -123,8 +137,11 @@ class RGWPutObj_ObjStore_SWIFT : public RGWPutObj_ObjStore { int update_slo_segment_size(rgw_slo_entry& entry); int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; int get_params() override; + int get_params(Jager_Tracer&, const Span&) override; void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWPutMetadataAccount_ObjStore_SWIFT : public RGWPutMetadataAccount_ObjStore { @@ -161,9 +178,12 @@ class RGWDeleteObj_ObjStore_SWIFT : public RGWDeleteObj_ObjStore { ~RGWDeleteObj_ObjStore_SWIFT() override {} int verify_permission() override; + int verify_permission(Jager_Tracer&, const Span&) override; int get_params() override; + int get_params(Jager_Tracer& tracer, const Span& parent_span) override; bool need_object_expiration() override { return true; } void send_response() override; + void send_response(Jager_Tracer&, const Span&) override; }; class RGWCopyObj_ObjStore_SWIFT : public RGWCopyObj_ObjStore { diff --git a/src/rgw/rgw_sal.cc b/src/rgw/rgw_sal.cc index 22cb33fa216b7..4da1e44782d84 100644 --- a/src/rgw/rgw_sal.cc +++ b/src/rgw/rgw_sal.cc @@ -32,6 +32,16 @@ namespace rgw::sal { int RGWRadosUser::list_buckets(const string& marker, const string& end_marker, uint64_t max, bool need_stats, RGWBucketList &buckets) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_sal.cc RGWRadosUser::list_buckets", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_sal.cc RGWRadosUser::list_buckets"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif RGWUserBuckets ulist; bool is_truncated = false; int ret; @@ -50,6 +60,28 @@ int RGWRadosUser::list_buckets(const string& marker, const string& end_marker, return 0; } +int RGWRadosUser::list_buckets(const string& marker, const string& end_marker, + uint64_t max, bool need_stats, RGWBucketList &buckets, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_sal.cc RGWRadosUser::list_buckets", parent_span); + RGWUserBuckets ulist; + bool is_truncated = false; + int ret; + + ret = store->ctl()->user->list_buckets(info.user_id, marker, end_marker, max, + need_stats, &ulist, &is_truncated, tracer, span); + if (ret < 0) + return ret; + + buckets.set_truncated(is_truncated); + for (const auto& ent : ulist.get_buckets()) { + RGWRadosBucket *rb = new RGWRadosBucket(this->store, *this, ent.second); + buckets.add(rb); + } + + return 0; +} + RGWBucketList::~RGWBucketList() { for (auto itr = buckets.begin(); itr != buckets.end(); itr++) { @@ -182,6 +214,16 @@ int RGWRadosBucket::sync_user_stats() int RGWRadosBucket::update_container_stats(void) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_sal.cc RGWRadosBucket::update_container_stats", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("rgw_sal.cc RGWRadosBucket::update_container_stats"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif int ret; map m; @@ -204,6 +246,31 @@ int RGWRadosBucket::update_container_stats(void) return 0; } +int RGWRadosBucket::update_container_stats(Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_sal.cc RGWRadosBucket::update_container_stats", parent_span); + int ret; + map m; + + m[ent.bucket.name] = ent; + ret = store->getRados()->update_containers_stats(m, tracer, span); + if (!ret) + return -EEXIST; + if (ret < 0) + return ret; + + map::iterator iter = m.find(ent.bucket.name); + if (iter == m.end()) + return -EINVAL; + + ent.count = iter->second.count; + ent.size = iter->second.size; + ent.size_rounded = iter->second.size_rounded; + ent.placement_rule = std::move(iter->second.placement_rule); + + return 0; +} + int RGWRadosBucket::check_bucket_shards(void) { return store->getRados()->check_bucket_shards(info, ent.bucket, get_count()); diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 356cbefebb6e5..2e72602b4e27e 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -102,6 +102,7 @@ class RGWBucket { virtual int read_bucket_stats(optional_yield y) = 0; virtual int sync_user_stats() = 0; virtual int update_container_stats(void) = 0; + virtual int update_container_stats(Jager_Tracer&, const Span&) = 0; virtual int check_bucket_shards(void) = 0; virtual int link(RGWUser* new_user, optional_yield y) = 0; virtual int unlink(RGWUser* new_user, optional_yield y) = 0; @@ -190,6 +191,8 @@ class RGWRadosUser : public RGWUser { int list_buckets(const string& marker, const string& end_marker, uint64_t max, bool need_stats, RGWBucketList& buckets); + int list_buckets(const string& marker, const string& end_marker, + uint64_t max, bool need_stats, RGWBucketList& buckets, Jager_Tracer&, const Span&); RGWBucket* add_bucket(rgw_bucket& bucket, ceph::real_time creation_time); /* Placeholders */ @@ -274,6 +277,7 @@ class RGWRadosBucket : public RGWBucket { virtual int read_bucket_stats(optional_yield y) override; virtual int sync_user_stats() override; virtual int update_container_stats(void) override; + virtual int update_container_stats(Jager_Tracer&, const Span&) override; virtual int check_bucket_shards(void) override; virtual int link(RGWUser* new_user, optional_yield y) override; virtual int unlink(RGWUser* new_user, optional_yield y) override; diff --git a/src/rgw/rgw_swift_auth.cc b/src/rgw/rgw_swift_auth.cc index 72288edf923be..8d957ab6b3055 100644 --- a/src/rgw/rgw_swift_auth.cc +++ b/src/rgw/rgw_swift_auth.cc @@ -620,6 +620,129 @@ SignedTokenEngine::authenticate(const DoutPrefixProvider* dpp, } /* namespace rgw */ +void RGW_SWIFT_Auth_Get::execute(Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_swift_auth.cc RGW_SWIFT_Auth_Get::execute()",parent_span); + int ret = -EPERM; + + const char *key = s->info.env->get("HTTP_X_AUTH_KEY"); + const char *user = s->info.env->get("HTTP_X_AUTH_USER"); + + s->prot_flags |= RGW_REST_SWIFT; + + string user_str; + RGWUserInfo info; + bufferlist bl; + RGWAccessKey *swift_key; + map::iterator siter; + + string swift_url = g_conf()->rgw_swift_url; + string swift_prefix = g_conf()->rgw_swift_url_prefix; + string tenant_path; + + /* + * We did not allow an empty Swift prefix before, but we want it now. + * So, we take rgw_swift_url_prefix = "/" to yield the empty prefix. + * The rgw_swift_url_prefix = "" is the default and yields "/swift" + * in a backwards-compatible way. + */ + if (swift_prefix.size() == 0) { + swift_prefix = DEFAULT_SWIFT_PREFIX; + } else if (swift_prefix == "/") { + swift_prefix.clear(); + } else { + if (swift_prefix[0] != '/') { + swift_prefix.insert(0, "/"); + } + } + + if (swift_url.size() == 0) { + bool add_port = false; + const char *server_port = s->info.env->get("SERVER_PORT_SECURE"); + const char *protocol; + if (server_port) { + add_port = (strcmp(server_port, "443") != 0); + protocol = "https"; + } else { + server_port = s->info.env->get("SERVER_PORT"); + add_port = (strcmp(server_port, "80") != 0); + protocol = "http"; + } + const char *host = s->info.env->get("HTTP_HOST"); + if (!host) { + dout(0) << "NOTICE: server is misconfigured, missing rgw_swift_url_prefix or rgw_swift_url, HTTP_HOST is not set" << dendl; + ret = -EINVAL; + goto done; + } + swift_url = protocol; + swift_url.append("://"); + swift_url.append(host); + if (add_port && !strchr(host, ':')) { + swift_url.append(":"); + swift_url.append(server_port); + } + } + + if (!key || !user) + goto done; + + user_str = user; + + if ((ret = store->ctl()->user->get_info_by_swift(user_str, &info, s->yield)) < 0) + { + ret = -EACCES; + goto done; + } + + siter = info.swift_keys.find(user_str); + if (siter == info.swift_keys.end()) { + ret = -EPERM; + goto done; + } + swift_key = &siter->second; + + if (swift_key->key.compare(key) != 0) { + dout(0) << "NOTICE: RGW_SWIFT_Auth_Get::execute(): bad swift key" << dendl; + ret = -EPERM; + goto done; + } + + if (!g_conf()->rgw_swift_tenant_name.empty()) { + tenant_path = "/AUTH_"; + tenant_path.append(g_conf()->rgw_swift_tenant_name); + } else if (g_conf()->rgw_swift_account_in_url) { + tenant_path = "/AUTH_"; + tenant_path.append(info.user_id.to_str()); + } + + dump_header(s, "X-Storage-Url", swift_url + swift_prefix + "/v1" + + tenant_path); + + using rgw::auth::swift::encode_token; + if ((ret = encode_token(s->cct, swift_key->id, swift_key->key, bl)) < 0) + goto done; + + { + static constexpr size_t PREFIX_LEN = sizeof("AUTH_rgwtk") - 1; + char token_val[PREFIX_LEN + bl.length() * 2 + 1]; + + snprintf(token_val, PREFIX_LEN + 1, "AUTH_rgwtk"); + buf_to_hex((const unsigned char *)bl.c_str(), bl.length(), + token_val + PREFIX_LEN); + + dump_header(s, "X-Storage-Token", token_val); + dump_header(s, "X-Auth-Token", token_val); + } + + ret = STATUS_NO_CONTENT; + +done: + set_req_state_err(s, ret); + dump_errno(s); + end_header(s); +} + + void RGW_SWIFT_Auth_Get::execute() { int ret = -EPERM; @@ -741,6 +864,18 @@ void RGW_SWIFT_Auth_Get::execute() end_header(s); } + +int RGWHandler_SWIFT_Auth::init(rgw::sal::RGWRadosStore *store, struct req_state *state, + rgw::io::BasicClient *cio,Jager_Tracer& tracer,const Span& parent_span) +{ + Span span=tracer.child_span("rgw_swift_auth.cc RGWHandler_SWIFT_Auth::init",parent_span); + state->dialect = "swift-auth"; + state->formatter = new JSONFormatter; + state->format = RGW_FORMAT_JSON; + + return RGWHandler::init(store, state, cio); +} + int RGWHandler_SWIFT_Auth::init(rgw::sal::RGWRadosStore *store, struct req_state *state, rgw::io::BasicClient *cio) { @@ -760,4 +895,3 @@ RGWOp *RGWHandler_SWIFT_Auth::op_get() { return new RGW_SWIFT_Auth_Get; } - diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h index 6270ee6759956..58bbe31ad1b05 100644 --- a/src/rgw/rgw_swift_auth.h +++ b/src/rgw/rgw_swift_auth.h @@ -10,6 +10,7 @@ #include "rgw_auth_keystone.h" #include "rgw_auth_filters.h" #include "rgw_sal.h" +#include "include/tracer.h" #define RGW_SWIFT_TOKEN_EXPIRATION (15 * 60) @@ -44,7 +45,7 @@ class TempURLEngine : public rgw::auth::Engine { const TempURLApplier::Factory* const apl_factory; /* Helper methods. */ - void get_owner_info(const DoutPrefixProvider* dpp, + void get_owner_info(const DoutPrefixProvider* dpp, const req_state* s, RGWUserInfo& owner_info) const; std::string convert_from_iso8601(std::string expires) const; @@ -289,6 +290,7 @@ class RGW_SWIFT_Auth_Get : public RGWOp { int verify_permission() override { return 0; } void execute() override; + void execute(Jager_Tracer&,const Span&) override; const char* name() const override { return "swift_auth_get"; } dmc::client_id dmclock_client() override { return dmc::client_id::auth; } }; @@ -300,9 +302,14 @@ class RGWHandler_SWIFT_Auth : public RGWHandler_REST { RGWOp *op_get() override; int init(rgw::sal::RGWRadosStore *store, struct req_state *state, rgw::io::BasicClient *cio) override; + int init(rgw::sal::RGWRadosStore *store, struct req_state *state, rgw::io::BasicClient *cio,Jager_Tracer&,const Span&) override; int authorize(const DoutPrefixProvider *dpp) override; int postauth_init() override { return 0; } int read_permissions(RGWOp *op) override { return 0; } + int read_permissions(RGWOp *op, Jager_Tracer& tracer, const Span& parent_span) override { + Span span = tracer.child_span("rgw_swift_auth.cc read_permissions", parent_span); + return 0; + } virtual RGWAccessControlPolicy *alloc_policy() { return NULL; } virtual void free_policy(RGWAccessControlPolicy *policy) {} diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc index b05fe0a0cc307..81f1a8a279675 100644 --- a/src/rgw/rgw_tools.cc +++ b/src/rgw/rgw_tools.cc @@ -107,6 +107,14 @@ int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, return 0; } +int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, + librados::IoCtx& ioctx, Jager_Tracer& tracer, const Span& parent_span, bool create, + bool mostly_omap) +{ + Span span = tracer.child_span("rgw_tools.cc rgw_init_ioctx", parent_span); + return rgw_init_ioctx(rados, pool, ioctx, create, mostly_omap); +} + void rgw_shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id) { uint32_t val = ceph_str_hash_linux(key.c_str(), key.size()); @@ -250,6 +258,16 @@ int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, librados::ObjectReadOperation *op, bufferlist* pbl, optional_yield y) { + // span_structure ss; + // #ifdef WITH_JAEGER + // Span span; + // if(global_state && !global_state->stack_span.empty()) + // span = tracer_2.child_span("rgw_tools.cc rgw_rados_operate", global_state->stack_span.top()); + // else if(global_state) + // span = tracer_2.new_span("rgw_tools.cc rgw_rados_operate"); + // ss.set_req_state(global_state); + // ss.set_span(span); + // #endif #ifdef HAVE_BOOST_CONTEXT // given a yield_context, call async_operate() to yield the coroutine instead // of blocking @@ -271,6 +289,14 @@ int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, return ioctx.operate(oid, op, nullptr); } +int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectReadOperation *op, bufferlist* pbl, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_tools.cc rgw_rados_operate", parent_span); + return rgw_rados_operate(ioctx, oid, op, pbl, y); +} + int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, librados::ObjectWriteOperation *op, optional_yield y) { @@ -289,6 +315,13 @@ int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, return ioctx.operate(oid, op); } +int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectWriteOperation *op, optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_tools.cc rgw_rados_operate", parent_span); + return rgw_rados_operate(ioctx, oid, op, y); +} + int rgw_rados_notify(librados::IoCtx& ioctx, const std::string& oid, bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, optional_yield y) diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h index 8c0065465465e..178cac26e130b 100644 --- a/src/rgw/rgw_tools.h +++ b/src/rgw/rgw_tools.h @@ -30,6 +30,10 @@ int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false, bool mostly_omap = false); +int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, + librados::IoCtx& ioctx, Jager_Tracer&, const Span&, + bool create = false, + bool mostly_omap = false); #define RGW_NO_SHARD -1 @@ -95,8 +99,13 @@ extern thread_local bool is_asio_thread; int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, librados::ObjectReadOperation *op, bufferlist* pbl, optional_yield y); +int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectReadOperation *op, bufferlist* pbl, + optional_yield y, Jager_Tracer&, const Span&); int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, librados::ObjectWriteOperation *op, optional_yield y); +int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectWriteOperation *op, optional_yield y, Jager_Tracer&, const Span&); int rgw_rados_notify(librados::IoCtx& ioctx, const std::string& oid, bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, optional_yield y); diff --git a/src/rgw/rgw_torrent.cc b/src/rgw/rgw_torrent.cc index b4501aad1fe92..24c663c980e3c 100644 --- a/src/rgw/rgw_torrent.cc +++ b/src/rgw/rgw_torrent.cc @@ -101,6 +101,12 @@ void seed::update(bufferlist &bl) sha1(&h, bl, bl.length()); } +void seed::update(bufferlist &bl, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("rgw_torrent.cc seed::update", parent_span); + seed::update(bl); +} + int seed::complete() { uint64_t remain = info.len%info.piece_length; diff --git a/src/rgw/rgw_torrent.h b/src/rgw/rgw_torrent.h index fea751485ae08..2e3d8307f25af 100644 --- a/src/rgw/rgw_torrent.h +++ b/src/rgw/rgw_torrent.h @@ -10,6 +10,7 @@ #include #include "common/ceph_time.h" +#include "include/tracer.h" #include "rgw_rados.h" #include "rgw_common.h" @@ -129,6 +130,7 @@ class seed void set_create_date(ceph::real_time& value); void set_info_name(const string& value); void update(bufferlist &bl); + void update(bufferlist &bl, Jager_Tracer&, const Span&); int complete(); private: diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index ee1cded39357b..88e03dc6f52aa 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -2824,6 +2824,16 @@ int RGWUserCtl::list_buckets(const rgw_user& user, bool *is_truncated, uint64_t default_max) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("rgw_user.cc RGWUserCtl::list_buckets", global_state->stack_span.top()); + else + span = tracer_2.new_span("rgw_user.cc RGWUserCtl::list_buckets"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif if (!max) { max = default_max; } @@ -2846,6 +2856,20 @@ int RGWUserCtl::list_buckets(const rgw_user& user, }); } +int RGWUserCtl::list_buckets(const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + bool need_stats, + RGWUserBuckets *buckets, + bool *is_truncated, + Jager_Tracer& tracer, const Span& parent_span, + uint64_t default_max) +{ + Span span = tracer.child_span("rgw_user.cc RGWUserCtl::list_buckets", parent_span); + return RGWUserCtl::list_buckets(user, marker, end_marker, max, need_stats, buckets, is_truncated, default_max); +} + int RGWUserCtl::flush_bucket_stats(const rgw_user& user, const RGWBucketEnt& ent) { diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h index 3e84230434c0c..9b298f99909d7 100644 --- a/src/rgw/rgw_user.h +++ b/src/rgw/rgw_user.h @@ -964,6 +964,16 @@ class RGWUserCtl bool *is_truncated, uint64_t default_max = 1000); + int list_buckets(const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + bool need_stats, + RGWUserBuckets *buckets, + bool *is_truncated, + Jager_Tracer&, const Span&, + uint64_t default_max = 1000); + int flush_bucket_stats(const rgw_user& user, const RGWBucketEnt& ent); int complete_flush_stats(const rgw_user& user); diff --git a/src/rgw/services/svc_bi.h b/src/rgw/services/svc_bi.h index de348b40de450..704553b9bb5a5 100644 --- a/src/rgw/services/svc_bi.h +++ b/src/rgw/services/svc_bi.h @@ -18,6 +18,7 @@ #pragma once #include "rgw/rgw_service.h" +#include "../include/tracer.h" class RGWBucketInfo; struct RGWBucketEnt; @@ -35,6 +36,9 @@ class RGWSI_BucketIndex : public RGWServiceInstance virtual int read_stats(const RGWBucketInfo& bucket_info, RGWBucketEnt *stats, optional_yield y) = 0; + virtual int read_stats(const RGWBucketInfo& bucket_info, + RGWBucketEnt *stats, + optional_yield y, Jager_Tracer& tracer, const Span& child_span) {return 0;} virtual int handle_overwrite(const RGWBucketInfo& info, const RGWBucketInfo& orig_info) = 0; diff --git a/src/rgw/services/svc_bi_rados.cc b/src/rgw/services/svc_bi_rados.cc index 3ef0b6f09c21b..1f247495d75be 100644 --- a/src/rgw/services/svc_bi_rados.cc +++ b/src/rgw/services/svc_bi_rados.cc @@ -167,6 +167,16 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index(const RGWBucketInfo& bucket_info, map *bucket_objs, map *bucket_instance_ids) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::open_bucket_index", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::open_bucket_index"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif int shard_id = _shard_id.value_or(-1); string bucket_oid_base; int ret = open_bucket_index_base(bucket_info, index_pool, &bucket_oid_base); @@ -183,6 +193,17 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index(const RGWBucketInfo& bucket_info, return 0; } +int RGWSI_BucketIndex_RADOS::open_bucket_index(const RGWBucketInfo& bucket_info, + std::optional _shard_id, + RGWSI_RADOS::Pool *index_pool, + map *bucket_objs, + map *bucket_instance_ids, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::open_bucket_index", parent_span); + return RGWSI_BucketIndex_RADOS::open_bucket_index(bucket_info, _shard_id, index_pool, bucket_objs, bucket_instance_ids); +} + + void RGWSI_BucketIndex_RADOS::get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards, int shard_id, @@ -353,6 +374,16 @@ int RGWSI_BucketIndex_RADOS::read_stats(const RGWBucketInfo& bucket_info, RGWBucketEnt *result, optional_yield y) { + span_structure ss; + #ifdef WITH_JAEGER + Span span; + if(global_state && !global_state->stack_span.empty()) + span = tracer_2.child_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::read_stats", global_state->stack_span.top()); + else if(global_state) + span = tracer_2.new_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::read_stats"); + ss.set_req_state(global_state); + ss.set_span(span); + #endif vector headers; result->bucket = bucket_info.bucket; @@ -378,6 +409,14 @@ int RGWSI_BucketIndex_RADOS::read_stats(const RGWBucketInfo& bucket_info, return 0; } +int RGWSI_BucketIndex_RADOS::read_stats(const RGWBucketInfo& bucket_info, + RGWBucketEnt *result, + optional_yield y, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("svc_bi_rados.cc RGWSI_BucketIndex_RADOS::read_stats", parent_span); + return RGWSI_BucketIndex_RADOS::read_stats(bucket_info, result, y); +} + int RGWSI_BucketIndex_RADOS::get_reshard_status(const RGWBucketInfo& bucket_info, list *status) { map bucket_objs; diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h index d59d3606a96ff..d51463190120b 100644 --- a/src/rgw/services/svc_bi_rados.h +++ b/src/rgw/services/svc_bi_rados.h @@ -19,6 +19,7 @@ #include "rgw/rgw_service.h" #include "rgw/rgw_tools.h" +#include "../include/tracer.h" #include "svc_bi.h" #include "svc_rados.h" @@ -101,6 +102,9 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex int read_stats(const RGWBucketInfo& bucket_info, RGWBucketEnt *stats, optional_yield y) override; + int read_stats(const RGWBucketInfo& bucket_info, + RGWBucketEnt *stats, + optional_yield y, Jager_Tracer&, const Span&) override; int get_reshard_status(const RGWBucketInfo& bucket_info, std::list *status); @@ -126,6 +130,12 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex RGWSI_RADOS::Pool *index_pool, map *bucket_objs, map *bucket_instance_ids); + + int open_bucket_index(const RGWBucketInfo& bucket_info, + std::optional shard_id, + RGWSI_RADOS::Pool *index_pool, + map *bucket_objs, + map *bucket_instance_ids, Jager_Tracer&, const Span&); }; diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc index 3f58e7c806439..d45c3f06d0df7 100644 --- a/src/rgw/services/svc_rados.cc +++ b/src/rgw/services/svc_rados.cc @@ -61,6 +61,16 @@ int RGWSI_RADOS::open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx, params.mostly_omap); } +int RGWSI_RADOS::open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx, + Jager_Tracer& tracer, const Span& parent_span, + const OpenParams& params) +{ + Span span = tracer.child_span("svc_rados.cc RGWSI_RADOS::open_pool_ctx", parent_span); + return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, tracer, span, + params.create, + params.mostly_omap); +} + int RGWSI_RADOS::pool_iterate(librados::IoCtx& io_ctx, librados::NObjectIterator& iter, uint32_t num, vector& objs, @@ -289,6 +299,12 @@ int RGWSI_RADOS::Pool::open(const OpenParams& params) return rados_svc->open_pool_ctx(pool, state.ioctx, params); } +int RGWSI_RADOS::Pool::open( Jager_Tracer& tracer, const Span& parent_span, const OpenParams& params) +{ + Span span = tracer.child_span("svc_rados.cc RGWSI_RADOS::Pool::open", parent_span); + return rados_svc->open_pool_ctx(pool, state.ioctx, tracer, span, params); +} + int RGWSI_RADOS::Pool::List::init(const string& marker, RGWAccessListFilter *filter) { if (ctx.initialized) { diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h index 85c593a36d280..8c1b2497090d9 100644 --- a/src/rgw/services/svc_rados.h +++ b/src/rgw/services/svc_rados.h @@ -54,6 +54,9 @@ class RGWSI_RADOS : public RGWServiceInstance librados::Rados* get_rados_handle(); int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx, const OpenParams& params = {}); + int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx, + Jager_Tracer&, const Span&, + const OpenParams& params = {}); int pool_iterate(librados::IoCtx& ioctx, librados::NObjectIterator& iter, uint32_t num, vector& objs, @@ -99,6 +102,7 @@ class RGWSI_RADOS : public RGWServiceInstance int create(const std::vector& pools, std::vector *retcodes); int lookup(); int open(const OpenParams& params = {}); + int open(Jager_Tracer&, const Span&, const OpenParams& params = {}); const rgw_pool& get_pool() { return pool; diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc index df5386930512b..82c77dbc6f4ec 100644 --- a/src/rgw/services/svc_zone.cc +++ b/src/rgw/services/svc_zone.cc @@ -1128,6 +1128,15 @@ int RGWSI_Zone::select_bucket_placement(const RGWUserInfo& user_info, const stri return 0; } +int RGWSI_Zone::select_bucket_placement(const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& placement_rule, + rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info, Jager_Tracer& tracer, const Span& parent_span) +{ + Span span = tracer.child_span("RGWSI_Zone::select_bucket_placement", parent_span); + return RGWSI_Zone::select_bucket_placement(user_info, zonegroup_id, placement_rule, pselected_rule, rule_info); +} + + int RGWSI_Zone::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info) { bufferlist map_bl; diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h index 982ab8a8d27e3..9e26548b0e66c 100644 --- a/src/rgw/services/svc_zone.h +++ b/src/rgw/services/svc_zone.h @@ -127,6 +127,9 @@ class RGWSI_Zone : public RGWServiceInstance int select_bucket_placement(const RGWUserInfo& user_info, const string& zonegroup_id, const rgw_placement_rule& rule, rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info); + int select_bucket_placement(const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& rule, + rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info, Jager_Tracer&, const Span&); int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info); int select_new_bucket_location(const RGWUserInfo& user_info, const string& zonegroup_id, const rgw_placement_rule& rule, diff --git a/src/tracerConfig.yaml b/src/tracerConfig.yaml new file mode 100644 index 0000000000000..e69de29bb2d1d