diff options
Diffstat (limited to 'src/rgw')
90 files changed, 6648 insertions, 1746 deletions
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 583b3531cdb..591d136ccae 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -25,6 +25,19 @@ add_custom_target(civetweb_h "${CMAKE_BINARY_DIR}/src/include/civetweb" COMMENT "keep civetweb.h up-to-date") +find_program(GPERF gperf) +if(NOT GPERF) + message(FATAL_ERROR "Can't find gperf") +endif() +function(gperf_generate input output) + add_custom_command( + OUTPUT ${output} + COMMAND ${GPERF} ${input} > ${output} + DEPENDS ${input} + COMMENT "Generate ${output}" + ) +endfunction() + set(rgw_a_srcs rgw_acl.cc rgw_acl_s3.cc @@ -103,7 +116,15 @@ set(rgw_a_srcs rgw_xml_enc.cc rgw_torrent.cc rgw_crypt.cc - rgw_crypt_sanitize.cc) + rgw_crypt_sanitize.cc + rgw_iam_policy.cc) + +gperf_generate(${CMAKE_SOURCE_DIR}/src/rgw/rgw_iam_policy_keywords.gperf + rgw_iam_policy_keywords.frag.cc) +set_source_files_properties(rgw_iam_policy.cc PROPERTIES + OBJECT_DEPENDS ${CMAKE_BINARY_DIR}/src/rgw/rgw_iam_policy_keywords.frag.cc + COMPILE_FLAGS -I${CMAKE_BINARY_DIR}/src/rgw) + if (WITH_RADOSGW_FCGI_FRONTEND) list(APPEND rgw_a_srcs rgw_fcgi.cc) @@ -115,12 +136,15 @@ add_dependencies(rgw_a civetweb_h) target_include_directories(rgw_a PUBLIC "../Beast/include" - ${FCGI_INCLUDE_DIR}) + ${FCGI_INCLUDE_DIR} + "../rapidjson/include" + ) +target_compile_definitions(rgw_a PUBLIC BOOST_COROUTINES_NO_DEPRECATION_WARNING) target_link_libraries(rgw_a librados cls_lock_client cls_rgw_client cls_refcount_client cls_log_client cls_statelog_client cls_timeindex_client cls_version_client cls_replica_log_client cls_user_client ceph-common common_utf8 global - ${CURL_LIBRARIES} + ${CURL_LIBRARIES} ${Boost_LIBRARIES} ${EXPAT_LIBRARIES} ${OPENLDAP_LIBRARIES} ${CRYPTO_LIBS}) @@ -134,15 +158,15 @@ if (WITH_RADOSGW_FCGI_FRONTEND) list(APPEND radosgw_srcs rgw_fcgi_process.cc) endif() -if (WITH_RADOSGW_ASIO_FRONTEND) +if (WITH_RADOSGW_BEAST_FRONTEND) list(APPEND radosgw_srcs rgw_asio_client.cc rgw_asio_frontend.cc) -endif (WITH_RADOSGW_ASIO_FRONTEND) +endif (WITH_RADOSGW_BEAST_FRONTEND) add_library(radosgw_a STATIC ${radosgw_srcs} $<TARGET_OBJECTS:civetweb_common_objs>) -target_link_libraries(radosgw_a rgw_a) +target_link_libraries(radosgw_a rgw_a ${SSL_LIBRARIES}) add_executable(radosgw rgw_main.cc) target_link_libraries(radosgw radosgw_a librados @@ -150,7 +174,7 @@ target_link_libraries(radosgw radosgw_a librados cls_log_client cls_statelog_client cls_timeindex_client cls_version_client cls_replica_log_client cls_user_client global ${FCGI_LIBRARY} ${LIB_RESOLV} - ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${SSL_LIBRARIES} ${BLKID_LIBRARIES} + ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${BLKID_LIBRARIES} ${ALLOC_LIBS}) # radosgw depends on cls libraries at runtime, but not as link dependencies add_dependencies(radosgw cls_rgw cls_lock cls_refcount diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h index 24567677e8e..26c84d121a2 100644 --- a/src/rgw/rgw_acl.h +++ b/src/rgw/rgw_acl.h @@ -15,8 +15,6 @@ #include "rgw_basic_types.h" -using namespace std; - #define RGW_PERM_NONE 0x00 #define RGW_PERM_READ 0x01 #define RGW_PERM_WRITE 0x02 diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h index 3a52735e091..6991a0b8363 100644 --- a/src/rgw/rgw_acl_s3.h +++ b/src/rgw/rgw_acl_s3.h @@ -13,9 +13,6 @@ #include "rgw_xml.h" #include "rgw_acl.h" - -using namespace std; - class RGWRados; class ACLPermission_S3 : public ACLPermission, public XMLObj diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc index 78b60124ba6..339b93bbe06 100644 --- a/src/rgw/rgw_acl_swift.cc +++ b/src/rgw/rgw_acl_swift.cc @@ -179,11 +179,13 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store, const rgw_user& id, const std::string& name, const std::string& read_list, - const std::string& write_list) + const std::string& write_list, + uint32_t& rw_mask) { acl.create_default(id, name); owner.set_id(id); owner.set_name(name); + rw_mask = 0; if (read_list.size()) { std::vector<std::string> uids; @@ -200,6 +202,7 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store, << r << dendl; return r; } + rw_mask |= SWIFT_PERM_READ; } if (write_list.size()) { std::vector<std::string> uids; @@ -216,10 +219,45 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store, << r << dendl; return r; } + rw_mask |= SWIFT_PERM_WRITE; } return 0; } +void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask, + RGWAccessControlPolicy_SWIFT *old) +{ + /* rw_mask&SWIFT_PERM_READ => setting read acl, + * rw_mask&SWIFT_PERM_WRITE => setting write acl + * when bit is cleared, copy matching elements from old. + */ + if (rw_mask == (SWIFT_PERM_READ|SWIFT_PERM_WRITE)) { + return; + } + rw_mask ^= (SWIFT_PERM_READ|SWIFT_PERM_WRITE); + for (auto &iter: old->acl.get_grant_map()) { + ACLGrant& grant = iter.second; + uint32_t perm = grant.get_permission().get_permissions(); + rgw_user id; + string url_spec; + if (!grant.get_id(id)) { + if (grant.get_group() != ACL_GROUP_ALL_USERS) { + url_spec = grant.get_referer(); + if (url_spec.empty()) { + continue; + } + if (perm == 0) { + /* We need to carry also negative, HTTP referrer-based ACLs. */ + perm = SWIFT_PERM_READ; + } + } + } + if (perm & rw_mask) { + acl.add_grant(&grant); + } + } +} + void RGWAccessControlPolicy_SWIFT::to_str(string& read, string& write) { multimap<string, ACLGrant>& m = acl.get_grant_map(); diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h index b74a85ecfe1..883b623af18 100644 --- a/src/rgw/rgw_acl_swift.h +++ b/src/rgw/rgw_acl_swift.h @@ -27,7 +27,9 @@ public: const rgw_user& id, const std::string& name, const std::string& read_list, - const std::string& write_list); + const std::string& write_list, + uint32_t& rw_mask); + void filter_merge(uint32_t mask, RGWAccessControlPolicy_SWIFT *policy); void to_str(std::string& read, std::string& write); }; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 46a9d3f933c..69ff573fcde 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -421,6 +421,7 @@ enum { OPT_METADATA_SYNC_INIT, OPT_METADATA_SYNC_RUN, OPT_MDLOG_LIST, + OPT_MDLOG_AUTOTRIM, OPT_MDLOG_TRIM, OPT_MDLOG_FETCH, OPT_MDLOG_STATUS, @@ -819,6 +820,8 @@ static int get_cmd(const char *cmd, const char *prev_cmd, const char *prev_prev_ } else if (strcmp(prev_cmd, "mdlog") == 0) { if (strcmp(cmd, "list") == 0) return OPT_MDLOG_LIST; + if (strcmp(cmd, "autotrim") == 0) + return OPT_MDLOG_AUTOTRIM; if (strcmp(cmd, "trim") == 0) return OPT_MDLOG_TRIM; if (strcmp(cmd, "fetch") == 0) @@ -1511,32 +1514,46 @@ int do_check_object_locator(const string& tenant_name, const string& bucket_name return 0; } -#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response -static int send_to_remote_gateway(const string& remote, req_info& info, - bufferlist& in_data, JSONParser& parser) +/// search for a matching zone/zonegroup id and return a connection if found +static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store, + const RGWZoneGroup& zonegroup, + const std::string& remote) { - bufferlist response; - RGWRESTConn *conn; - if (remote.empty()) { - if (!store->rest_master_conn) { - cerr << "Invalid rest master connection" << std::endl; - return -EINVAL; - } - conn = store->rest_master_conn; + boost::optional<RGWRESTConn> conn; + if (remote == zonegroup.get_id()) { + conn.emplace(store->ctx(), store, remote, zonegroup.endpoints); } else { - // check zonegroups - auto iter = store->zonegroup_conn_map.find(remote); - if (iter == store->zonegroup_conn_map.end()) { - // check zones - iter = store->zone_conn_map.find(remote); - if (iter == store->zone_conn_map.end()) { - cerr << "could not find connection for zone or zonegroup id: " - << remote << std::endl; - return -ENOENT; + for (const auto& z : zonegroup.zones) { + const auto& zone = z.second; + if (remote == zone.id) { + conn.emplace(store->ctx(), store, remote, zone.endpoints); + break; } } - conn = iter->second; } + return conn; +} + +/// search each zonegroup for a connection +static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store, + const RGWPeriodMap& period_map, + const std::string& remote) +{ + boost::optional<RGWRESTConn> conn; + for (const auto& zg : period_map.zonegroups) { + conn = get_remote_conn(store, zg.second, remote); + if (conn) { + break; + } + } + return conn; +} + +#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response +static int send_to_remote_gateway(RGWRESTConn* conn, req_info& info, + bufferlist& in_data, JSONParser& parser) +{ + bufferlist response; rgw_user user; int ret = conn->forward(user, info, NULL, MAX_REST_RESPONSE, &in_data, &response); @@ -1574,20 +1591,21 @@ static int send_to_url(const string& url, const string& access, return ret; } -static int send_to_remote_or_url(const string& remote, const string& url, +static int send_to_remote_or_url(RGWRESTConn *conn, const string& url, const string& access, const string& secret, req_info& info, bufferlist& in_data, JSONParser& parser) { if (url.empty()) { - return send_to_remote_gateway(remote, info, in_data, parser); + return send_to_remote_gateway(conn, info, in_data, parser); } return send_to_url(url, access, secret, info, in_data, parser); } static int commit_period(RGWRealm& realm, RGWPeriod& period, string remote, const string& url, - const string& access, const string& secret) + const string& access, const string& secret, + bool force) { const string& master_zone = period.get_master_zone(); if (master_zone.empty()) { @@ -1605,7 +1623,7 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period, return ret; } // the master zone can commit locally - ret = period.commit(realm, current_period, cerr); + ret = period.commit(realm, current_period, cerr, force); if (ret < 0) { cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl; } @@ -1617,6 +1635,17 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period, remote = master_zone; cout << "Sending period to new master zone " << remote << std::endl; } + boost::optional<RGWRESTConn> conn; + RGWRESTConn *remote_conn = nullptr; + if (!remote.empty()) { + conn = get_remote_conn(store, period.get_map(), remote); + if (!conn) { + cerr << "failed to find a zone or zonegroup for remote " + << remote << std::endl; + return -ENOENT; + } + remote_conn = &*conn; + } // push period to the master with an empty period id period.set_id(""); @@ -1633,7 +1662,7 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period, jf.flush(bl); JSONParser p; - int ret = send_to_remote_or_url(remote, url, access, secret, info, bl, p); + int ret = send_to_remote_or_url(remote_conn, url, access, secret, info, bl, p); if (ret < 0) { cerr << "request failed: " << cpp_strerror(-ret) << std::endl; @@ -1682,7 +1711,7 @@ static int update_period(const string& realm_id, const string& realm_name, const string& period_id, const string& period_epoch, bool commit, const string& remote, const string& url, const string& access, const string& secret, - Formatter *formatter) + Formatter *formatter, bool force) { RGWRealm realm(realm_id, realm_name); int ret = realm.init(g_ceph_context, store); @@ -1713,7 +1742,7 @@ static int update_period(const string& realm_id, const string& realm_name, return ret; } if (commit) { - ret = commit_period(realm, period, remote, url, access, secret); + ret = commit_period(realm, period, remote, url, access, secret, force); if (ret < 0) { cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl; return ret; @@ -1739,8 +1768,10 @@ static int init_bucket_for_sync(const string& tenant, const string& bucket_name, return 0; } -static int do_period_pull(const string& remote, const string& url, const string& access_key, const string& secret_key, - const string& realm_id, const string& realm_name, const string& period_id, const string& period_epoch, +static int do_period_pull(RGWRESTConn *remote_conn, const string& url, + const string& access_key, const string& secret_key, + const string& realm_id, const string& realm_name, + const string& period_id, const string& period_epoch, RGWPeriod *period) { RGWEnv env; @@ -1760,7 +1791,7 @@ static int do_period_pull(const string& remote, const string& url, const string& bufferlist bl; JSONParser p; - int ret = send_to_remote_or_url(remote, url, access_key, secret_key, + int ret = send_to_remote_or_url(remote_conn, url, access_key, secret_key, info, bl, p); if (ret < 0) { cerr << "request failed: " << cpp_strerror(-ret) << std::endl; @@ -1826,14 +1857,13 @@ static void get_md_sync_status(list<string>& status) return; } - ret = sync.read_sync_status(); + rgw_meta_sync_status sync_status; + ret = sync.read_sync_status(&sync_status); if (ret < 0) { status.push_back(string("failed to read sync status: ") + cpp_strerror(-ret)); return; } - const rgw_meta_sync_status& sync_status = sync.get_sync_status(); - string status_str; switch (sync_status.sync_info.state) { case rgw_meta_sync_info::StateInit: @@ -1912,7 +1942,8 @@ static void get_md_sync_status(list<string>& status) continue; } auto master_marker = iter->second.marker; - if (master_marker > local_iter.second.marker) { + if (local_iter.second.state == rgw_meta_sync_marker::SyncState::IncrementalSync && + master_marker > local_iter.second.marker) { shards_behind[shard_id] = local_iter.second.marker; } } @@ -2059,7 +2090,8 @@ static void get_data_sync_status(const string& source_zone, list<string>& status continue; } auto master_marker = iter->second.marker; - if (master_marker > local_iter.second.marker) { + if (local_iter.second.state == rgw_data_sync_marker::SyncState::IncrementalSync && + master_marker > local_iter.second.marker) { shards_behind[shard_id] = local_iter.second.marker; } } @@ -2823,15 +2855,6 @@ int main(int argc, const char **argv) ++i; } } - if (tenant.empty()) { - tenant = user_id.tenant; - } else { - if (user_id.empty()) { - cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl; - return EINVAL; - } - user_id.tenant = tenant; - } if (args.empty()) { return usage(); @@ -2871,6 +2894,23 @@ int main(int argc, const char **argv) } } + if (tenant.empty()) { + tenant = user_id.tenant; + } else { + if (user_id.empty() && opt_cmd != OPT_ROLE_CREATE + && opt_cmd != OPT_ROLE_DELETE + && opt_cmd != OPT_ROLE_GET + && opt_cmd != OPT_ROLE_MODIFY + && opt_cmd != OPT_ROLE_LIST + && opt_cmd != OPT_ROLE_POLICY_PUT + && opt_cmd != OPT_ROLE_POLICY_LIST + && opt_cmd != OPT_ROLE_POLICY_GET + && opt_cmd != OPT_ROLE_POLICY_DELETE) { + cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl; + return EINVAL; + } + user_id.tenant = tenant; + } /* check key parameter conflict */ if ((!access_key.empty()) && gen_access_key) { cerr << "ERROR: key parameter conflict, --access-key & --gen-access-key" << std::endl; @@ -2905,9 +2945,6 @@ int main(int argc, const char **argv) // not a raw op if 'period update' needs to commit to master bool raw_period_update = opt_cmd == OPT_PERIOD_UPDATE && !commit; - // not a raw op if 'period pull' needs to look up remotes - bool raw_period_pull = opt_cmd == OPT_PERIOD_PULL && remote.empty() && !url.empty(); - std::set<int> raw_storage_ops_list = {OPT_ZONEGROUP_ADD, OPT_ZONEGROUP_CREATE, OPT_ZONEGROUP_DELETE, OPT_ZONEGROUP_GET, OPT_ZONEGROUP_LIST, OPT_ZONEGROUP_SET, OPT_ZONEGROUP_DEFAULT, @@ -2923,6 +2960,7 @@ int main(int argc, const char **argv) OPT_ZONE_PLACEMENT_MODIFY, OPT_ZONE_PLACEMENT_LIST, OPT_REALM_CREATE, OPT_PERIOD_DELETE, OPT_PERIOD_GET, + OPT_PERIOD_PULL, OPT_PERIOD_GET_CURRENT, OPT_PERIOD_LIST, OPT_GLOBAL_QUOTA_GET, OPT_GLOBAL_QUOTA_SET, OPT_GLOBAL_QUOTA_ENABLE, OPT_GLOBAL_QUOTA_DISABLE, @@ -2934,7 +2972,7 @@ int main(int argc, const char **argv) bool raw_storage_op = (raw_storage_ops_list.find(opt_cmd) != raw_storage_ops_list.end() || - raw_period_update || raw_period_pull); + raw_period_update); if (raw_storage_op) { store = RGWStoreManager::get_raw_storage(g_ceph_context); @@ -3040,20 +3078,45 @@ int main(int argc, const char **argv) { int ret = update_period(realm_id, realm_name, period_id, period_epoch, commit, remote, url, access_key, secret_key, - formatter); + formatter, yes_i_really_mean_it); if (ret < 0) { return -ret; } } break; - case OPT_PERIOD_PULL: // period pull --url + case OPT_PERIOD_PULL: { + boost::optional<RGWRESTConn> conn; + RGWRESTConn *remote_conn = nullptr; if (url.empty()) { - cerr << "A --url or --remote must be provided." << std::endl; - return EINVAL; + // load current period for endpoints + RGWRealm realm(realm_id, realm_name); + int ret = realm.init(g_ceph_context, store); + if (ret < 0) { + cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl; + return -ret; + } + RGWPeriod current_period(realm.get_current_period()); + ret = current_period.init(g_ceph_context, store); + if (ret < 0) { + cerr << "failed to init current period: " << cpp_strerror(-ret) << std::endl; + return -ret; + } + if (remote.empty()) { + // use realm master zone as remote + remote = current_period.get_master_zone(); + } + conn = get_remote_conn(store, current_period.get_map(), remote); + if (!conn) { + cerr << "failed to find a zone or zonegroup for remote " + << remote << std::endl; + return -ENOENT; + } + remote_conn = &*conn; } + RGWPeriod period; - int ret = do_period_pull(remote, url, access_key, secret_key, + int ret = do_period_pull(remote_conn, url, access_key, secret_key, realm_id, realm_name, period_id, period_epoch, &period); if (ret < 0) { @@ -3399,7 +3462,7 @@ int main(int argc, const char **argv) auto& current_period = realm.get_current_period(); if (!current_period.empty()) { // pull the latest epoch of the realm's current period - ret = do_period_pull(remote, url, access_key, secret_key, + ret = do_period_pull(nullptr, url, access_key, secret_key, realm_id, realm_name, current_period, "", &period); if (ret < 0) { @@ -3642,7 +3705,6 @@ int main(int argc, const char **argv) } if (need_update) { - zonegroup.post_process_params(); ret = zonegroup.update(); if (ret < 0) { cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl; @@ -4533,7 +4595,7 @@ int main(int argc, const char **argv) jf.flush(bl); JSONParser p; - ret = send_to_remote_or_url(remote, url, access_key, secret_key, + ret = send_to_remote_or_url(nullptr, url, access_key, secret_key, info, bl, p); if (ret < 0) { cerr << "request failed: " << cpp_strerror(-ret) << std::endl; @@ -4541,43 +4603,11 @@ int main(int argc, const char **argv) } } return 0; - case OPT_PERIOD_PULL: // period pull --remote - { - if (remote.empty()) { - /* use realm master zonegroup as remote */ - RGWRealm realm(realm_id, realm_name); - int ret = realm.init(g_ceph_context, store); - if (ret < 0) { - cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - RGWPeriod current_period(realm.get_current_period()); - ret = current_period.init(g_ceph_context, store); - if (ret < 0) { - cerr << "failed to init current period: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - remote = current_period.get_master_zonegroup(); - } - RGWPeriod period; - int ret = do_period_pull(remote, url, access_key, secret_key, - realm_id, realm_name, period_id, period_epoch, - &period); - if (ret < 0) { - cerr << "period pull failed: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - - encode_json("period", period, formatter); - formatter->flush(cout); - cout << std::endl; - } - return 0; case OPT_PERIOD_UPDATE: { int ret = update_period(realm_id, realm_name, period_id, period_epoch, commit, remote, url, access_key, secret_key, - formatter); + formatter, yes_i_really_mean_it); if (ret < 0) { return -ret; } @@ -4598,7 +4628,8 @@ int main(int argc, const char **argv) cerr << "period init failed: " << cpp_strerror(-ret) << std::endl; return -ret; } - ret = commit_period(realm, period, remote, url, access_key, secret_key); + ret = commit_period(realm, period, remote, url, access_key, secret_key, + yes_i_really_mean_it); if (ret < 0) { cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl; return -ret; @@ -4611,10 +4642,13 @@ int main(int argc, const char **argv) return 0; case OPT_ROLE_CREATE: { - string uid; - user_id.to_str(uid); - if (role_name.empty() || assume_role_doc.empty() || uid.empty()) { - cerr << "ERROR: one of role name or assume role policy document or uid is empty" << std::endl; + if (role_name.empty()) { + cerr << "ERROR: role name is empty" << std::endl; + return -EINVAL; + } + + if (assume_role_doc.empty()) { + cerr << "ERROR: assume role policy document is empty" << std::endl; return -EINVAL; } /* The following two calls will be replaced by read_decode_json or something @@ -4631,7 +4665,7 @@ int main(int argc, const char **argv) return -EINVAL; } string trust_policy = bl.to_str(); - RGWRole role(g_ceph_context, store, role_name, path, trust_policy, uid); + RGWRole role(g_ceph_context, store, role_name, path, trust_policy, tenant); ret = role.create(true); if (ret < 0) { return -ret; @@ -4645,7 +4679,7 @@ int main(int argc, const char **argv) cerr << "ERROR: empty role name" << std::endl; return -EINVAL; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.delete_obj(); if (ret < 0) { return -ret; @@ -4659,7 +4693,7 @@ int main(int argc, const char **argv) cerr << "ERROR: empty role name" << std::endl; return -EINVAL; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.get(); if (ret < 0) { return -ret; @@ -4669,10 +4703,16 @@ int main(int argc, const char **argv) } case OPT_ROLE_MODIFY: { - if (role_name.empty() || assume_role_doc.empty()) { - cerr << "ERROR: one of role name or assume role policy document is empty" << std::endl; + if (role_name.empty()) { + cerr << "ERROR: role name is empty" << std::endl; return -EINVAL; } + + if (assume_role_doc.empty()) { + cerr << "ERROR: assume role policy document is empty" << std::endl; + return -EINVAL; + } + /* The following two calls will be replaced by read_decode_json or something similar when the code for AWS Policies is in place */ bufferlist bl; @@ -4687,7 +4727,7 @@ int main(int argc, const char **argv) return -EINVAL; } string trust_policy = bl.to_str(); - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.get(); if (ret < 0) { return -ret; @@ -4703,7 +4743,7 @@ int main(int argc, const char **argv) case OPT_ROLE_LIST: { vector<RGWRole> result; - ret = RGWRole::get_roles_by_path_prefix(store, g_ceph_context, path_prefix, result); + ret = RGWRole::get_roles_by_path_prefix(store, g_ceph_context, path_prefix, tenant, result); if (ret < 0) { return -ret; } @@ -4712,10 +4752,21 @@ int main(int argc, const char **argv) } case OPT_ROLE_POLICY_PUT: { - if (role_name.empty() || policy_name.empty() || perm_policy_doc.empty()) { - cerr << "One of role name, policy name or permission policy document is empty" << std::endl; + if (role_name.empty()) { + cerr << "role name is empty" << std::endl; + return -EINVAL; + } + + if (policy_name.empty()) { + cerr << "policy name is empty" << std::endl; + return -EINVAL; + } + + if (perm_policy_doc.empty()) { + cerr << "permission policy document is empty" << std::endl; return -EINVAL; } + /* The following two calls will be replaced by read_decode_json or something similar, when code for AWS Policies is in place.*/ bufferlist bl; @@ -4732,7 +4783,7 @@ int main(int argc, const char **argv) string perm_policy; perm_policy = bl.c_str(); - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.get(); if (ret < 0) { return -ret; @@ -4751,7 +4802,7 @@ int main(int argc, const char **argv) cerr << "ERROR: Role name is empty" << std::endl; return -EINVAL; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.get(); if (ret < 0) { return -ret; @@ -4762,11 +4813,16 @@ int main(int argc, const char **argv) } case OPT_ROLE_POLICY_GET: { - if (role_name.empty() || policy_name.empty()) { - cerr << "ERROR: One of role name or policy name is empty" << std::endl; + if (role_name.empty()) { + cerr << "ERROR: role name is empty" << std::endl; return -EINVAL; } - RGWRole role(g_ceph_context, store, role_name); + + if (policy_name.empty()) { + cerr << "ERROR: policy name is empty" << std::endl; + return -EINVAL; + } + RGWRole role(g_ceph_context, store, role_name, tenant); int ret = role.get(); if (ret < 0) { return -ret; @@ -4781,11 +4837,16 @@ int main(int argc, const char **argv) } case OPT_ROLE_POLICY_DELETE: { - if (role_name.empty() || policy_name.empty()) { - cerr << "ERROR: One of role name or policy name is empty" << std::endl; + if (role_name.empty()) { + cerr << "ERROR: role name is empty" << std::endl; + return -EINVAL; + } + + if (policy_name.empty()) { + cerr << "ERROR: policy name is empty" << std::endl; return -EINVAL; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, tenant); ret = role.get(); if (ret < 0) { return -ret; @@ -6170,6 +6231,26 @@ next: formatter->flush(cout); } + if (opt_cmd == OPT_MDLOG_AUTOTRIM) { + // need a full history for purging old mdlog periods + store->meta_mgr->init_oldest_log_period(); + + RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry()); + RGWHTTPManager http(store->ctx(), crs.get_completion_mgr()); + int ret = http.set_threaded(); + if (ret < 0) { + cerr << "failed to initialize http client with " << cpp_strerror(ret) << std::endl; + return -ret; + } + + auto num_shards = g_conf->rgw_md_log_max_shards; + ret = crs.run(create_admin_meta_log_trim_cr(store, &http, num_shards)); + if (ret < 0) { + cerr << "automated mdlog trim failed with " << cpp_strerror(ret) << std::endl; + return -ret; + } + } + if (opt_cmd == OPT_MDLOG_TRIM) { utime_t start_time, end_time; @@ -6212,14 +6293,13 @@ next: return -ret; } - ret = sync.read_sync_status(); + rgw_meta_sync_status sync_status; + ret = sync.read_sync_status(&sync_status); if (ret < 0) { cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl; return -ret; } - const rgw_meta_sync_status& sync_status = sync.get_sync_status(); - formatter->open_object_section("summary"); encode_json("sync_status", sync_status, formatter); @@ -6255,7 +6335,7 @@ next: } ret = sync.init_sync_status(); if (ret < 0) { - cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl; + cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl; return -ret; } } @@ -6336,7 +6416,7 @@ next: ret = sync.init_sync_status(); if (ret < 0) { - cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl; + cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl; return -ret; } } @@ -6384,7 +6464,7 @@ next: } ret = sync.init_sync_status(); if (ret < 0) { - cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl; + cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl; return -ret; } } diff --git a/src/rgw/rgw_asio_client.cc b/src/rgw/rgw_asio_client.cc index 219a36b6837..63de2d27e75 100644 --- a/src/rgw/rgw_asio_client.cc +++ b/src/rgw/rgw_asio_client.cc @@ -3,6 +3,7 @@ #include <boost/algorithm/string/predicate.hpp> #include <boost/asio/write.hpp> +#include <beast/http/read.hpp> #include "rgw_asio_client.h" @@ -12,22 +13,23 @@ #undef dout_prefix #define dout_prefix (*_dout << "asio: ") +using namespace rgw::asio; -RGWAsioClientIO::RGWAsioClientIO(tcp::socket&& socket, - request_type&& request) - : socket(std::move(socket)), - request(std::move(request)), - txbuf(*this) { +ClientIO::ClientIO(tcp::socket& socket, + parser_type& parser, + beast::flat_streambuf& buffer) + : socket(socket), parser(parser), buffer(buffer), txbuf(*this) +{ } -RGWAsioClientIO::~RGWAsioClientIO() = default; +ClientIO::~ClientIO() = default; -void RGWAsioClientIO::init_env(CephContext *cct) +void ClientIO::init_env(CephContext *cct) { env.init(cct); - body_iter = request.body.begin(); - const auto& headers = request.headers; + const auto& request = parser.get(); + const auto& headers = request.fields; for (auto header = headers.begin(); header != headers.end(); ++header) { const auto& name = header->name(); const auto& value = header->value(); @@ -80,42 +82,58 @@ void RGWAsioClientIO::init_env(CephContext *cct) // TODO: set REMOTE_USER if authenticated } -size_t RGWAsioClientIO::write_data(const char* const buf, - const size_t len) +size_t ClientIO::write_data(const char* buf, size_t len) { boost::system::error_code ec; auto bytes = boost::asio::write(socket, boost::asio::buffer(buf, len), ec); if (ec) { derr << "write_data failed: " << ec.message() << dendl; throw rgw::io::Exception(ec.value(), std::system_category()); - } else { - /* According to the documentation of boost::asio::write if there is - * no error (signalised by ec), then bytes == len. We don't need to - * take care of partial writes in such situation. */ - return bytes; } + /* According to the documentation of boost::asio::write if there is + * no error (signalised by ec), then bytes == len. We don't need to + * take care of partial writes in such situation. */ + return bytes; } -size_t RGWAsioClientIO::read_data(char* const buf, const size_t max) +size_t ClientIO::read_data(char* buf, size_t max) { - // read data from the body's bufferlist - auto bytes = std::min<unsigned>(max, body_iter.get_remaining()); - body_iter.copy(bytes, buf); - return bytes; + auto& message = parser.get(); + auto& body_remaining = message.body; + body_remaining = boost::asio::mutable_buffer{buf, max}; + + boost::system::error_code ec; + + dout(30) << this << " read_data for " << max << " with " + << buffer.size() << " bytes buffered" << dendl; + + while (boost::asio::buffer_size(body_remaining) && !parser.is_complete()) { + auto bytes = beast::http::read_some(socket, buffer, parser, ec); + buffer.consume(bytes); + if (ec == boost::asio::error::connection_reset || + ec == boost::asio::error::eof || + ec == beast::http::error::partial_message) { + break; + } + if (ec) { + derr << "failed to read body: " << ec.message() << dendl; + throw rgw::io::Exception(ec.value(), std::system_category()); + } + } + return max - boost::asio::buffer_size(body_remaining); } -size_t RGWAsioClientIO::complete_request() +size_t ClientIO::complete_request() { return 0; } -void RGWAsioClientIO::flush() +void ClientIO::flush() { txbuf.pubsync(); } -size_t RGWAsioClientIO::send_status(const int status, - const char* const status_name) +size_t ClientIO::send_status(int status, const char* status_name) { static constexpr size_t STATUS_BUF_SIZE = 128; @@ -126,7 +144,7 @@ size_t RGWAsioClientIO::send_status(const int status, return txbuf.sputn(statusbuf, statuslen); } -size_t RGWAsioClientIO::send_100_continue() +size_t ClientIO::send_100_continue() { const char HTTTP_100_CONTINUE[] = "HTTP/1.1 100 CONTINUE\r\n\r\n"; const size_t sent = txbuf.sputn(HTTTP_100_CONTINUE, @@ -148,7 +166,7 @@ static size_t dump_date_header(char (×tr)[TIME_BUF_SIZE]) "Date: %a, %d %b %Y %H:%M:%S %Z\r\n", tmp); } -size_t RGWAsioClientIO::complete_header() +size_t ClientIO::complete_header() { size_t sent = 0; @@ -172,8 +190,8 @@ size_t RGWAsioClientIO::complete_header() return sent; } -size_t RGWAsioClientIO::send_header(const boost::string_ref& name, - const boost::string_ref& value) +size_t ClientIO::send_header(const boost::string_ref& name, + const boost::string_ref& value) { static constexpr char HEADER_SEP[] = ": "; static constexpr char HEADER_END[] = "\r\n"; @@ -188,7 +206,7 @@ size_t RGWAsioClientIO::send_header(const boost::string_ref& name, return sent; } -size_t RGWAsioClientIO::send_content_length(const uint64_t len) +size_t ClientIO::send_content_length(uint64_t len) { static constexpr size_t CONLEN_BUF_SIZE = 128; diff --git a/src/rgw/rgw_asio_client.h b/src/rgw/rgw_asio_client.h index c87fd5f720b..513a3ef0ca2 100644 --- a/src/rgw/rgw_asio_client.h +++ b/src/rgw/rgw_asio_client.h @@ -4,36 +4,58 @@ #define RGW_ASIO_CLIENT_H #include <boost/asio/ip/tcp.hpp> -#include <beast/http/body_type.hpp> -#include <beast/http/concepts.hpp> -#include <beast/http/message_v1.hpp> +#include <beast/http/message.hpp> +#include <beast/http/message_parser.hpp> +#include <beast/core/flat_streambuf.hpp> #include "include/assert.h" #include "rgw_client_io.h" -// bufferlist to represent the message body -class RGWBufferlistBody { - public: - using value_type = ceph::bufferlist; +namespace rgw { +namespace asio { - class reader; - class writer; +/// streaming message body interface +struct streaming_body { + using value_type = boost::asio::mutable_buffer; - template <bool isRequest, typename Headers> - using message_type = beast::http::message<isRequest, RGWBufferlistBody, - Headers>; -}; + class reader { + value_type& buffer; + public: + using mutable_buffers_type = boost::asio::mutable_buffers_1; -class RGWAsioClientIO : public rgw::io::RestfulClient, - public rgw::io::BuffererSink { - using tcp = boost::asio::ip::tcp; - tcp::socket socket; + static const bool is_direct{true}; // reads directly into user buffer - using body_type = RGWBufferlistBody; - using request_type = beast::http::request_v1<body_type>; - request_type request; + template<bool isRequest, class Fields> + explicit reader(beast::http::message<isRequest, streaming_body, Fields>& m) + : buffer(m.body) + {} + + void init() {} + void init(uint64_t content_length) {} + void finish() {} + + mutable_buffers_type prepare(size_t n) { + n = std::min(n, boost::asio::buffer_size(buffer)); + auto position = boost::asio::buffer_cast<char*>(buffer); + return {position, n}; + } - bufferlist::const_iterator body_iter; + void commit(size_t n) { + buffer = buffer + n; + } + }; +}; + +using header_type = beast::http::fields; +using parser_type = beast::http::message_parser<true, streaming_body, header_type>; + +class ClientIO : public io::RestfulClient, + public io::BuffererSink { + private: + using tcp = boost::asio::ip::tcp; + tcp::socket& socket; + parser_type& parser; + beast::flat_streambuf& buffer; //< parse buffer bool conn_keepalive{false}; bool conn_close{false}; @@ -45,8 +67,11 @@ class RGWAsioClientIO : public rgw::io::RestfulClient, size_t read_data(char *buf, size_t max); public: - RGWAsioClientIO(tcp::socket&& socket, request_type&& request); - ~RGWAsioClientIO() override; + ClientIO(tcp::socket& socket, parser_type& parser, + beast::flat_streambuf& buffer); + ~ClientIO() override; + + bool get_conn_close() const { return conn_close; } void init_env(CephContext *cct) override; size_t complete_request() override; @@ -71,45 +96,7 @@ class RGWAsioClientIO : public rgw::io::RestfulClient, } }; -// used by beast::http::read() to read the body into a bufferlist -class RGWBufferlistBody::reader { - value_type& bl; - public: - template<bool isRequest, typename Headers> - explicit reader(message_type<isRequest, Headers>& m) : bl(m.body) {} - - void write(const char* data, size_t size, boost::system::error_code&) { - bl.append(data, size); - } -}; - -// used by beast::http::write() to write the buffered body -class RGWBufferlistBody::writer { - const value_type& bl; - public: - template<bool isRequest, typename Headers> - explicit writer(const message_type<isRequest, Headers>& msg) - : bl(msg.body) {} - - void init(boost::system::error_code& ec) {} - uint64_t content_length() const { return bl.length(); } - - template<typename Write> - boost::tribool operator()(beast::http::resume_context&&, - boost::system::error_code&, Write&& write) { - // translate from bufferlist to a ConstBufferSequence for beast - std::vector<boost::asio::const_buffer> buffers; - buffers.reserve(bl.get_num_buffers()); - for (auto& ptr : bl.buffers()) { - buffers.emplace_back(ptr.c_str(), ptr.length()); - } - write(buffers); - return true; - } -}; -static_assert(beast::http::is_ReadableBody<RGWBufferlistBody>{}, - "RGWBufferlistBody does not satisfy ReadableBody"); -static_assert(beast::http::is_WritableBody<RGWBufferlistBody>{}, - "RGWBufferlistBody does not satisfy WritableBody"); +} // namespace asio +} // namespace rgw #endif // RGW_ASIO_CLIENT_H diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc index ff2d7806759..5fc1deec42e 100644 --- a/src/rgw/rgw_asio_frontend.cc +++ b/src/rgw/rgw_asio_frontend.cc @@ -7,13 +7,11 @@ #include <vector> #include <boost/asio.hpp> -#include <boost/optional.hpp> +#include <boost/asio/spawn.hpp> #include <beast/core/placeholders.hpp> -#include <beast/core/streambuf.hpp> -#include <beast/http/empty_body.hpp> -#include <beast/http/parse_error.hpp> #include <beast/http/read.hpp> +#include <beast/http/string_body.hpp> #include <beast/http/write.hpp> #include "rgw_asio_frontend.h" @@ -71,28 +69,47 @@ void Pauser::wait() using tcp = boost::asio::ip::tcp; -class AsioConnection : public std::enable_shared_from_this<AsioConnection> { - RGWProcessEnv& env; - boost::asio::io_service::strand strand; - tcp::socket socket; - tcp::endpoint endpoint; - beast::streambuf buf; - beast::http::request_v1<RGWBufferlistBody> request; +// coroutine to handle a client connection to completion +static void handle_connection(RGWProcessEnv& env, tcp::socket socket, + boost::asio::yield_context yield) +{ + auto cct = env.store->ctx(); + boost::system::error_code ec; - public: - void on_read(boost::system::error_code ec) { - auto cct = env.store->ctx(); + beast::flat_streambuf buffer{1024}; + + // read messages from the socket until eof + for (;;) { + // parse the header + rgw::asio::parser_type parser; + do { + auto bytes = beast::http::async_read_some(socket, buffer, parser, yield[ec]); + buffer.consume(bytes); + } while (!ec && !parser.got_header()); + + if (ec == boost::asio::error::connection_reset || + ec == boost::asio::error::eof) { + return; + } if (ec) { - if (ec.category() == beast::http::get_parse_error_category()) { - ldout(cct, 1) << "parse failed: " << ec.message() << dendl; - } else { - ldout(cct, 1) << "read failed: " << ec.message() << dendl; - } - write_bad_request(); + auto& message = parser.get(); + ldout(cct, 1) << "read failed: " << ec.message() << dendl; + ldout(cct, 1) << "====== req done http_status=400 ======" << dendl; + beast::http::response<beast::http::string_body> response; + response.status = 400; + response.reason = "Bad Request"; + response.version = message.version == 10 ? 10 : 11; + beast::http::prepare(response); + beast::http::async_write(socket, std::move(response), yield[ec]); + // ignore ec return; } + + // process the request RGWRequest req{env.store->get_new_req_id()}; - RGWAsioClientIO real_client{std::move(socket), std::move(request)}; + + rgw::asio::ClientIO real_client{socket, parser, buffer}; + auto real_client_io = rgw::io::add_reordering( rgw::io::add_buffering( rgw::io::add_chunking( @@ -101,40 +118,12 @@ class AsioConnection : public std::enable_shared_from_this<AsioConnection> { RGWRestfulIO client(&real_client_io); process_request(env.store, env.rest, &req, env.uri_prefix, *env.auth_registry, &client, env.olog); - } - - void write_bad_request() { - beast::http::response_v1<beast::http::empty_body> response; - response.status = 400; - response.reason = "Bad Request"; - /* If the request is so terribly malformed that we can't extract even - * the protocol version, we will use HTTP/1.1 as a fallback. */ - response.version = request.version ? request.version : 11; - beast::http::prepare(response); - beast::http::async_write(socket, std::move(response), - std::bind(&AsioConnection::on_write, - shared_from_this(), - beast::asio::placeholders::error)); - } - void on_write(boost::system::error_code ec) { - auto cct = env.store->ctx(); - if (ec) { - ldout(cct, 1) << "write failed: " << ec.message() << dendl; + if (real_client.get_conn_close()) { + return; } } - - public: - AsioConnection(RGWProcessEnv& env, tcp::socket&& socket) - : env(env), strand(socket.get_io_service()), socket(std::move(socket)) - {} - - void read() { - beast::http::async_read(socket, buf, request, strand.wrap( - std::bind(&AsioConnection::on_read, shared_from_this(), - beast::asio::placeholders::error))); - } -}; +} class AsioFrontend { RGWProcessEnv env; @@ -168,9 +157,19 @@ int AsioFrontend::init() auto ep = tcp::endpoint{tcp::v4(), static_cast<unsigned short>(env.port)}; ldout(ctx(), 4) << "frontend listening on " << ep << dendl; - acceptor.open(ep.protocol()); + boost::system::error_code ec; + acceptor.open(ep.protocol(), ec); + if (ec) { + lderr(ctx()) << "failed to open socket: " << ec.message() << dendl; + return -ec.value(); + } acceptor.set_option(tcp::acceptor::reuse_address(true)); - acceptor.bind(ep); + acceptor.bind(ep, ec); + if (ec) { + lderr(ctx()) << "failed to bind address " << ep << + ": " << ec.message() << dendl; + return -ec.value(); + } acceptor.listen(boost::asio::socket_base::max_connections); acceptor.async_accept(peer_socket, [this] (boost::system::error_code ec) { @@ -189,13 +188,15 @@ void AsioFrontend::accept(boost::system::error_code ec) throw ec; } auto socket = std::move(peer_socket); - + // spawn a coroutine to handle the connection + boost::asio::spawn(service, + [&] (boost::asio::yield_context yield) { + handle_connection(env, std::move(socket), yield); + }); acceptor.async_accept(peer_socket, [this] (boost::system::error_code ec) { return accept(ec); }); - - std::make_shared<AsioConnection>(env, std::move(socket))->read(); } int AsioFrontend::run() diff --git a/src/rgw/rgw_auth.cc b/src/rgw/rgw_auth.cc index 6311d4b16ab..c63db0ccec5 100644 --- a/src/rgw/rgw_auth.cc +++ b/src/rgw/rgw_auth.cc @@ -56,6 +56,21 @@ transform_old_authinfo(const req_state* const s) return id == acct_id; } + bool is_identity(const idset_t& ids) const override { + for (auto& p : ids) { + if (p.is_wildcard()) { + return true; + } else if (p.is_tenant() && p.get_tenant() == id.tenant) { + return true; + } else if (p.is_user() && + (p.get_tenant() == id.tenant) && + (p.get_id() == id.id)) { + return true; + } + } + return false; + } + uint32_t get_perm_mask() const override { return perm_mask; } @@ -291,6 +306,29 @@ bool rgw::auth::RemoteApplier::is_owner_of(const rgw_user& uid) const return info.acct_user == uid; } +bool rgw::auth::RemoteApplier::is_identity(const idset_t& ids) const { + for (auto& id : ids) { + if (id.is_wildcard()) { + return true; + + // We also need to cover cases where rgw_keystone_implicit_tenants + // was enabled. */ + } else if (id.is_tenant() && + (info.acct_user.tenant.empty() ? + info.acct_user.id : + info.acct_user.tenant) == id.get_tenant()) { + return true; + } else if (id.is_user() && + info.acct_user.id == id.get_id() && + (info.acct_user.tenant.empty() ? + info.acct_user.id : + info.acct_user.tenant) == id.get_tenant()) { + return true; + } + } + return false; +} + void rgw::auth::RemoteApplier::to_str(std::ostream& out) const { out << "rgw::auth::RemoteApplier(acct_user=" << info.acct_user @@ -383,8 +421,23 @@ bool rgw::auth::LocalApplier::is_owner_of(const rgw_user& uid) const return uid == user_info.user_id; } -void rgw::auth::LocalApplier::to_str(std::ostream& out) const -{ +bool rgw::auth::LocalApplier::is_identity(const idset_t& ids) const { + for (auto& id : ids) { + if (id.is_wildcard()) { + return true; + } else if (id.is_tenant() && + id.get_tenant() == user_info.user_id.tenant) { + return true; + } else if (id.is_user() && + (id.get_tenant() == user_info.user_id.tenant) && + (id.get_id() == user_info.user_id.id)) { + return true; + } + } + return false; +} + +void rgw::auth::LocalApplier::to_str(std::ostream& out) const { out << "rgw::auth::LocalApplier(acct_user=" << user_info.user_id << ", acct_name=" << user_info.display_name << ", subuser=" << subuser diff --git a/src/rgw/rgw_auth.h b/src/rgw/rgw_auth.h index 8118ac150d3..819a3380385 100644 --- a/src/rgw/rgw_auth.h +++ b/src/rgw/rgw_auth.h @@ -16,7 +16,6 @@ #define RGW_USER_ANON_ID "anonymous" - namespace rgw { namespace auth { @@ -28,6 +27,7 @@ using Exception = std::system_error; class Identity { public: typedef std::map<std::string, int> aclspec_t; + using idset_t = boost::container::flat_set<Principal>; virtual ~Identity() = default; @@ -64,6 +64,10 @@ public: } virtual void to_str(std::ostream& out) const = 0; + + /* Verify whether a given identity corresponds to an identity in the + provided set */ + virtual bool is_identity(const idset_t& ids) const = 0; }; inline std::ostream& operator<<(std::ostream& out, @@ -404,6 +408,8 @@ public: uint32_t get_perms_from_aclspec(const aclspec_t& aclspec) const override; bool is_admin_of(const rgw_user& uid) const override; bool is_owner_of(const rgw_user& uid) const override; + bool is_identity(const idset_t& ids) const override; + uint32_t get_perm_mask() const override { return info.perm_mask; } void to_str(std::ostream& out) const override; void load_acct_info(RGWUserInfo& user_info) const override; /* out */ @@ -449,6 +455,7 @@ public: uint32_t get_perms_from_aclspec(const aclspec_t& aclspec) const override; bool is_admin_of(const rgw_user& uid) const override; bool is_owner_of(const rgw_user& uid) const override; + bool is_identity(const idset_t& ids) const override; uint32_t get_perm_mask() const override { return get_perm_mask(subuser, user_info); } diff --git a/src/rgw/rgw_auth_filters.h b/src/rgw/rgw_auth_filters.h index 204a43e73f8..78846fc28d9 100644 --- a/src/rgw/rgw_auth_filters.h +++ b/src/rgw/rgw_auth_filters.h @@ -80,6 +80,11 @@ public: return get_decoratee().get_perm_mask(); } + bool is_identity( + const boost::container::flat_set<Principal>& ids) const override { + return get_decoratee().is_identity(ids); + } + void to_str(std::ostream& out) const override { get_decoratee().to_str(out); } diff --git a/src/rgw/rgw_basic_types.cc b/src/rgw/rgw_basic_types.cc index 5ebf1cfe446..c16d920f21b 100644 --- a/src/rgw/rgw_basic_types.cc +++ b/src/rgw/rgw_basic_types.cc @@ -1,6 +1,13 @@ +#include <iostream> +#include <sstream> +#include <string> + #include "rgw_basic_types.h" #include "common/ceph_json.h" +using std::string; +using std::stringstream; + void decode_json_obj(rgw_user& val, JSONObj *obj) { string s = obj->get_data(); @@ -12,3 +19,24 @@ void encode_json(const char *name, const rgw_user& val, Formatter *f) string s = val.to_str(); f->dump_string(name, s); } + +namespace rgw { +namespace auth { +ostream& operator <<(ostream& m, const Principal& p) { + if (p.is_wildcard()) { + return m << "*"; + } + + m << "arn:aws:iam:" << p.get_tenant() << ":"; + if (p.is_tenant()) { + return m << "root"; + } + return m << (p.is_user() ? "user/" : "role/") << p.get_id(); +} +string to_string(const Principal& p) { + stringstream s; + s << p; + return s.str(); +} +} +} diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h index 261b201369a..31e9d3a32ac 100644 --- a/src/rgw/rgw_basic_types.h +++ b/src/rgw/rgw_basic_types.h @@ -1,3 +1,5 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab #ifndef CEPH_RGW_BASIC_TYPES_H #define CEPH_RGW_BASIC_TYPES_H @@ -18,6 +20,10 @@ struct rgw_user { : tenant(tenant), id(id) { } + rgw_user(std::string&& tenant, std::string&& id) + : tenant(std::move(tenant)), + id(std::move(id)) { + } void encode(bufferlist& bl) const { ENCODE_START(1, 1, bl); @@ -100,6 +106,82 @@ struct rgw_user { }; WRITE_CLASS_ENCODER(rgw_user) +// Represents an identity. This is more wide-ranging than a +// 'User'. Its purposes is to be matched against by an +// IdentityApplier. The internal representation will doubtless change as +// more types are added. We may want to expose the type enum and make +// the member public so people can switch/case on it. + +namespace rgw { +namespace auth { +class Principal { + enum types { User, Role, Tenant, Wildcard }; + types t; + rgw_user u; + + Principal(types t) + : t(t) {} + + Principal(types t, std::string&& n, std::string i) + : t(t), u(std::move(n), std::move(i)) {} + +public: + + static Principal wildcard() { + return Principal(Wildcard); + } + + static Principal user(std::string&& t, std::string&& u) { + return Principal(User, std::move(t), std::move(u)); + } + + static Principal role(std::string&& t, std::string&& u) { + return Principal(Role, std::move(t), std::move(u)); + } + + static Principal tenant(std::string&& t) { + return Principal(Tenant, std::move(t), {}); + } + + bool is_wildcard() const { + return t == Wildcard; + } + + bool is_user() const { + return t == User; + } + + bool is_role() const { + return t == Role; + } + + bool is_tenant() const { + return t == Tenant; + } + + const std::string& get_tenant() const { + ceph_assert(t != Wildcard); + return u.tenant; + } + + const std::string& get_id() const { + ceph_assert(t != Wildcard && t != Tenant); + return u.id; + } + + bool operator ==(const Principal& o) const { + return (t == o.t) && (u == o.u); + } + + bool operator <(const Principal& o) const { + return (t < o.t) || ((t == o.t) && (u < o.u)); + } +}; + +std::ostream& operator <<(std::ostream& m, const Principal& p); +std::string to_string(const Principal& p); +} +} class JSONObj; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 049ab4a294b..7aeee21ad25 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -51,18 +51,9 @@ void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id) * acceptable in bucket names and thus qualified buckets cannot conflict * with the legacy or S3 buckets. */ -void rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry) { - if (bucket_name.empty()) { - bucket_entry.clear(); - } else if (tenant_name.empty()) { - bucket_entry = bucket_name; - } else { - bucket_entry = tenant_name + "/" + bucket_name; - } -} - -string rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name) { - string bucket_entry; +std::string rgw_make_bucket_entry_name(const std::string& tenant_name, + const std::string& bucket_name) { + std::string bucket_entry; if (bucket_name.empty()) { bucket_entry.clear(); @@ -1959,11 +1950,11 @@ int RGWDataChangesLog::trim_entries(const real_time& start_time, const real_time bool RGWDataChangesLog::going_down() { - return (down_flag.read() != 0); + return down_flag; } RGWDataChangesLog::~RGWDataChangesLog() { - down_flag.set(1); + down_flag = true; renew_thread->stop(); renew_thread->join(); delete renew_thread; @@ -2243,7 +2234,7 @@ public: bci.info.bucket.name = bucket_name; bci.info.bucket.bucket_id = bucket_instance; bci.info.bucket.tenant = tenant_name; - ret = store->select_bucket_location_by_rule(bci.info.placement_rule, bci.info.bucket, &rule_info); + ret = store->select_bucket_location_by_rule(bci.info.placement_rule, &rule_info); if (ret < 0) { ldout(store->ctx(), 0) << "ERROR: select_bucket_placement() returned " << ret << dendl; return ret; diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h index e691a1178a6..14c472faa8c 100644 --- a/src/rgw/rgw_bucket.h +++ b/src/rgw/rgw_bucket.h @@ -20,9 +20,6 @@ #include "common/ceph_time.h" #include "rgw_formats.h" - -using namespace std; - // define as static when RGWBucket implementation compete extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id); @@ -49,11 +46,14 @@ extern int rgw_bucket_delete_bucket_obj(RGWRados *store, extern int rgw_bucket_sync_user_stats(RGWRados *store, const rgw_user& user_id, const RGWBucketInfo& bucket_info); extern int rgw_bucket_sync_user_stats(RGWRados *store, const string& tenant_name, const string& bucket_name); -extern void rgw_make_bucket_entry_name(const string& tenant_name, - const string& bucket_name, - string& bucket_entry); -extern string rgw_make_bucket_entry_name(const string& tenant_name, - const string& bucket_name); +extern std::string rgw_make_bucket_entry_name(const std::string& tenant_name, + const std::string& bucket_name); +static inline void rgw_make_bucket_entry_name(const string& tenant_name, + const string& bucket_name, + std::string& bucket_entry) { + bucket_entry = rgw_make_bucket_entry_name(tenant_name, bucket_name); +} + extern void rgw_parse_url_bucket(const string& bucket, const string& auth_tenant, string &tenant_name, string &bucket_name); @@ -401,7 +401,7 @@ class RGWDataChangesLog { RWLock modified_lock; map<int, set<string> > modified_shards; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; struct ChangeStatus { real_time cur_expiration; diff --git a/src/rgw/rgw_cache.h b/src/rgw/rgw_cache.h index 32068d39073..6bc9ef14903 100644 --- a/src/rgw/rgw_cache.h +++ b/src/rgw/rgw_cache.h @@ -231,7 +231,8 @@ public: bufferlist& data, RGWObjVersionTracker *objv_tracker, real_time set_mtime) override; - int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive) override; + int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive, + RGWObjVersionTracker *objv_tracker = nullptr) override; int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state, RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj, @@ -422,7 +423,8 @@ int RGWCache<T>::put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, real_time } template <class T> -int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& data, off_t ofs, bool exclusive) +int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& data, off_t ofs, bool exclusive, + RGWObjVersionTracker *objv_tracker) { rgw_pool pool; string oid; @@ -436,7 +438,11 @@ int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& da info.status = 0; info.flags = CACHE_FLAG_DATA; } - int ret = T::put_system_obj_data(ctx, obj, data, ofs, exclusive); + if (objv_tracker) { + info.version = objv_tracker->write_version; + info.flags |= CACHE_FLAG_OBJV; + } + int ret = T::put_system_obj_data(ctx, obj, data, ofs, exclusive, objv_tracker); if (cacheable) { string name = normal_name(pool, oid); if (ret >= 0) { diff --git a/src/rgw/rgw_civetweb_frontend.cc b/src/rgw/rgw_civetweb_frontend.cc index c564d0e4920..a18a6c5da41 100644 --- a/src/rgw/rgw_civetweb_frontend.cc +++ b/src/rgw/rgw_civetweb_frontend.cc @@ -66,32 +66,30 @@ int RGWCivetWebFrontend::run() /* Prepare options for CivetWeb. */ const std::set<boost::string_ref> rgw_opts = { "port", "prefix" }; - const size_t CW_NUM_OPTS = 2 * (conf_map.size() - rgw_opts.size()) + 1; - const char *options[CW_NUM_OPTS]; - size_t i = 0; + + std::vector<const char*> options; for (const auto& pair : conf_map) { if (! rgw_opts.count(pair.first)) { /* CivetWeb doesn't understand configurables of the glue layer between * it and RadosGW. We need to strip them out. Otherwise CivetWeb would * signalise an error. */ - options[i + 0] = pair.first.c_str(); - options[i + 1] = pair.second.c_str(); + options.push_back(pair.first.c_str()); + options.push_back(pair.second.c_str()); - dout(20) << "civetweb config: " << options[i] << ": " - << (options[i + 1] ? options[i + 1] : "<null>") << dendl; - i += 2; + dout(20) << "civetweb config: " << pair.first + << ": " << pair.second << dendl; } } - options[i] = nullptr; + options.push_back(nullptr); /* Initialize the CivetWeb right now. */ struct mg_callbacks cb; memset((void *)&cb, 0, sizeof(cb)); cb.begin_request = civetweb_callback; cb.log_message = rgw_civetweb_log_callback; cb.log_access = rgw_civetweb_log_access_callback; - ctx = mg_start(&cb, this, (const char **)&options); + ctx = mg_start(&cb, this, options.data()); return ! ctx ? -EIO : 0; } /* RGWCivetWebFrontend::run */ diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index fbff872ee21..009918b76ff 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -11,10 +11,12 @@ #include "json_spirit/json_spirit.h" #include "common/ceph_json.h" +#include "rgw_op.h" #include "rgw_common.h" #include "rgw_acl.h" #include "rgw_string.h" #include "rgw_rados.h" +#include "rgw_http_errors.h" #include "common/ceph_crypto.h" #include "common/armor.h" @@ -32,15 +34,87 @@ #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw -#define POLICY_ACTION 0x01 -#define POLICY_RESOURCE 0x02 -#define POLICY_ARN 0x04 -#define POLICY_STRING 0x08 +using boost::none; +using boost::optional; + +using rgw::IAM::ARN; +using rgw::IAM::Effect; +using rgw::IAM::op_to_perm; +using rgw::IAM::Policy; PerfCounters *perfcounter = NULL; const uint32_t RGWBucketInfo::NUM_SHARDS_BLIND_BUCKET(UINT32_MAX); +rgw_http_errors rgw_http_s3_errors({ + { 0, {200, "" }}, + { STATUS_CREATED, {201, "Created" }}, + { STATUS_ACCEPTED, {202, "Accepted" }}, + { STATUS_NO_CONTENT, {204, "NoContent" }}, + { STATUS_PARTIAL_CONTENT, {206, "" }}, + { ERR_PERMANENT_REDIRECT, {301, "PermanentRedirect" }}, + { ERR_WEBSITE_REDIRECT, {301, "WebsiteRedirect" }}, + { STATUS_REDIRECT, {303, "" }}, + { ERR_NOT_MODIFIED, {304, "NotModified" }}, + { EINVAL, {400, "InvalidArgument" }}, + { ERR_INVALID_REQUEST, {400, "InvalidRequest" }}, + { ERR_INVALID_DIGEST, {400, "InvalidDigest" }}, + { ERR_BAD_DIGEST, {400, "BadDigest" }}, + { ERR_INVALID_BUCKET_NAME, {400, "InvalidBucketName" }}, + { ERR_INVALID_OBJECT_NAME, {400, "InvalidObjectName" }}, + { ERR_UNRESOLVABLE_EMAIL, {400, "UnresolvableGrantByEmailAddress" }}, + { ERR_INVALID_PART, {400, "InvalidPart" }}, + { ERR_INVALID_PART_ORDER, {400, "InvalidPartOrder" }}, + { ERR_REQUEST_TIMEOUT, {400, "RequestTimeout" }}, + { ERR_TOO_LARGE, {400, "EntityTooLarge" }}, + { ERR_TOO_SMALL, {400, "EntityTooSmall" }}, + { ERR_TOO_MANY_BUCKETS, {400, "TooManyBuckets" }}, + { ERR_MALFORMED_XML, {400, "MalformedXML" }}, + { ERR_AMZ_CONTENT_SHA256_MISMATCH, {400, "XAmzContentSHA256Mismatch" }}, + { ERR_LENGTH_REQUIRED, {411, "MissingContentLength" }}, + { EACCES, {403, "AccessDenied" }}, + { EPERM, {403, "AccessDenied" }}, + { ERR_SIGNATURE_NO_MATCH, {403, "SignatureDoesNotMatch" }}, + { ERR_INVALID_ACCESS_KEY, {403, "InvalidAccessKeyId" }}, + { ERR_USER_SUSPENDED, {403, "UserSuspended" }}, + { ERR_REQUEST_TIME_SKEWED, {403, "RequestTimeTooSkewed" }}, + { ERR_QUOTA_EXCEEDED, {403, "QuotaExceeded" }}, + { ENOENT, {404, "NoSuchKey" }}, + { ERR_NO_SUCH_BUCKET, {404, "NoSuchBucket" }}, + { ERR_NO_SUCH_WEBSITE_CONFIGURATION, {404, "NoSuchWebsiteConfiguration" }}, + { ERR_NO_SUCH_UPLOAD, {404, "NoSuchUpload" }}, + { ERR_NOT_FOUND, {404, "Not Found"}}, + { ERR_NO_SUCH_LC, {404, "NoSuchLifecycleConfiguration"}}, + { ERR_METHOD_NOT_ALLOWED, {405, "MethodNotAllowed" }}, + { ETIMEDOUT, {408, "RequestTimeout" }}, + { EEXIST, {409, "BucketAlreadyExists" }}, + { ERR_USER_EXIST, {409, "UserAlreadyExists" }}, + { ERR_EMAIL_EXIST, {409, "EmailExists" }}, + { ERR_KEY_EXIST, {409, "KeyExists"}}, + { ERR_INVALID_SECRET_KEY, {400, "InvalidSecretKey"}}, + { ERR_INVALID_KEY_TYPE, {400, "InvalidKeyType"}}, + { ERR_INVALID_CAP, {400, "InvalidCapability"}}, + { ERR_INVALID_TENANT_NAME, {400, "InvalidTenantName" }}, + { ENOTEMPTY, {409, "BucketNotEmpty" }}, + { ERR_PRECONDITION_FAILED, {412, "PreconditionFailed" }}, + { ERANGE, {416, "InvalidRange" }}, + { ERR_UNPROCESSABLE_ENTITY, {422, "UnprocessableEntity" }}, + { ERR_LOCKED, {423, "Locked" }}, + { ERR_INTERNAL_ERROR, {500, "InternalError" }}, + { ERR_NOT_IMPLEMENTED, {501, "NotImplemented" }}, + { ERR_SERVICE_UNAVAILABLE, {503, "ServiceUnavailable"}}, +}); + +rgw_http_errors rgw_http_swift_errors({ + { EACCES, {403, "AccessDenied" }}, + { EPERM, {401, "AccessDenied" }}, + { ERR_USER_SUSPENDED, {401, "UserSuspended" }}, + { ERR_INVALID_UTF8, {412, "Invalid UTF8" }}, + { ERR_BAD_URL, {412, "Bad URL" }}, + { ERR_NOT_SLO_MANIFEST, {400, "Not an SLO manifest" }}, + { ERR_QUOTA_EXCEEDED, {413, "QuotaExceeded" }}, +}); + int rgw_perf_start(CephContext *cct) { PerfCountersBuilder plb(cct, cct->_conf->name.to_str(), l_rgw_first, l_rgw_last); @@ -84,12 +158,6 @@ rgw_err() clear(); } -rgw_err:: -rgw_err(int http, const std::string& s3) - : http_ret(http), ret(0), s3_code(s3) -{ -} - void rgw_err:: clear() { @@ -224,6 +292,77 @@ req_state::~req_state() { delete object_acl; } +bool search_err(rgw_http_errors& errs, int err_no, bool is_website_redirect, int& http_ret, string& code) +{ + auto r = errs.find(err_no); + if (r != errs.end()) { + if (! is_website_redirect) + http_ret = r->second.first; + code = r->second.second; + return true; + } + return false; +} + +void set_req_state_err(struct rgw_err& err, /* out */ + int err_no, /* in */ + const int prot_flags) /* in */ +{ + if (err_no < 0) + err_no = -err_no; + + err.ret = -err_no; + bool is_website_redirect = false; + + if (prot_flags & RGW_REST_SWIFT) { + if (search_err(rgw_http_swift_errors, err_no, is_website_redirect, err.http_ret, err.s3_code)) + return; + } + + //Default to searching in s3 errors + is_website_redirect |= (prot_flags & RGW_REST_WEBSITE) + && err_no == ERR_WEBSITE_REDIRECT && err.is_clear(); + if (search_err(rgw_http_s3_errors, err_no, is_website_redirect, err.http_ret, err.s3_code)) + return; + dout(0) << "WARNING: set_req_state_err err_no=" << err_no + << " resorting to 500" << dendl; + + err.http_ret = 500; + err.s3_code = "UnknownError"; +} + +void set_req_state_err(struct req_state* s, int err_no, const string& err_msg) +{ + if (s) { + set_req_state_err(s, err_no); + s->err.message = err_msg; + } +} + +void set_req_state_err(struct req_state* s, int err_no) +{ + if (s) { + set_req_state_err(s->err, err_no, s->prot_flags); + } +} + +void dump(struct req_state* s) +{ + if (s->format != RGW_FORMAT_HTML) + s->formatter->open_object_section("Error"); + if (!s->err.s3_code.empty()) + s->formatter->dump_string("Code", s->err.s3_code); + if (!s->err.message.empty()) + s->formatter->dump_string("Message", s->err.message); + if (!s->bucket_name.empty()) // TODO: connect to expose_bucket + s->formatter->dump_string("BucketName", s->bucket_name); + if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle + s->formatter->dump_string("RequestId", s->trans_id); + s->formatter->dump_string("HostId", s->host_id); + if (s->format != RGW_FORMAT_HTML) + s->formatter->close_section(); +} + struct str_len { const char *str; int len; @@ -938,17 +1077,39 @@ bool verify_requester_payer_permission(struct req_state *s) } bool verify_bucket_permission(struct req_state * const s, + const rgw_bucket& bucket, RGWAccessControlPolicy * const user_acl, RGWAccessControlPolicy * const bucket_acl, - const int perm) + const optional<Policy>& bucket_policy, + const uint64_t op) { - if (!bucket_acl) + if (!verify_requester_payer_permission(s)) return false; - if ((perm & (int)s->perm_mask) != perm) + if (bucket_policy) { + auto r = bucket_policy->eval(s->env, *s->auth.identity, op, ARN(bucket)); + if (r == Effect::Allow) + // It looks like S3 ACLs only GRANT permissions rather than + // denying them, so this should be safe. + return true; + else if (r == Effect::Deny) + return false; + } + + const auto perm = op_to_perm(op); + + return verify_bucket_permission_no_policy(s, user_acl, bucket_acl, perm); +} + +bool verify_bucket_permission_no_policy(struct req_state * const s, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + const int perm) +{ + if (!bucket_acl) return false; - if (!verify_requester_payer_permission(s)) + if ((perm & (int)s->perm_mask) != perm) return false; if (bucket_acl->verify_permission(*s->auth.identity, perm, perm, @@ -961,35 +1122,76 @@ bool verify_bucket_permission(struct req_state * const s, return user_acl->verify_permission(*s->auth.identity, perm, perm); } -bool verify_bucket_permission(struct req_state * const s, const int perm) +bool verify_bucket_permission_no_policy(struct req_state * const s, const int perm) +{ + if (!verify_requester_payer_permission(s)) + return false; + + return verify_bucket_permission_no_policy(s, + s->user_acl.get(), + s->bucket_acl, + perm); +} + +bool verify_bucket_permission(struct req_state * const s, const uint64_t op) { return verify_bucket_permission(s, + s->bucket, s->user_acl.get(), s->bucket_acl, - perm); + s->iam_policy, + op); +} + +static inline bool check_deferred_bucket_perms(struct req_state * const s, + const rgw_bucket& bucket, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + const optional<Policy>& bucket_policy, + const uint8_t deferred_check, + const uint64_t op) +{ + return (s->defer_to_bucket_acls == deferred_check \ + && verify_bucket_permission(s, bucket, user_acl, bucket_acl, bucket_policy, op)); } -static inline bool check_deferred_bucket_acl(struct req_state * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - const uint8_t deferred_check, - const int perm) +static inline bool check_deferred_bucket_only_acl(struct req_state * const s, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + const uint8_t deferred_check, + const int perm) { return (s->defer_to_bucket_acls == deferred_check \ - && verify_bucket_permission(s, user_acl, bucket_acl, perm)); + && verify_bucket_permission_no_policy(s, user_acl, bucket_acl, perm)); } bool verify_object_permission(struct req_state * const s, + const rgw_obj& obj, RGWAccessControlPolicy * const user_acl, RGWAccessControlPolicy * const bucket_acl, RGWAccessControlPolicy * const object_acl, - const int perm) + const optional<Policy>& bucket_policy, + const uint64_t op) { if (!verify_requester_payer_permission(s)) return false; - if (check_deferred_bucket_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) || - check_deferred_bucket_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, RGW_PERM_FULL_CONTROL)) { + if (bucket_policy) { + auto r = bucket_policy->eval(s->env, *s->auth.identity, op, ARN(obj)); + if (r == Effect::Allow) + // It looks like S3 ACLs only GRANT permissions rather than + // denying them, so this should be safe. + return true; + else if (r == Effect::Deny) + return false; + } + + const auto perm = op_to_perm(op); + + if (check_deferred_bucket_perms(s, obj.bucket, user_acl, bucket_acl, bucket_policy, + RGW_DEFER_TO_BUCKET_ACLS_RECURSE, op) || + check_deferred_bucket_perms(s, obj.bucket, user_acl, bucket_acl, bucket_policy, + RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, rgw::IAM::s3All)) { return true; } @@ -1029,13 +1231,72 @@ bool verify_object_permission(struct req_state * const s, return user_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm); } -bool verify_object_permission(struct req_state *s, int perm) +bool verify_object_permission_no_policy(struct req_state * const s, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + RGWAccessControlPolicy * const object_acl, + const int perm) +{ + if (check_deferred_bucket_only_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) || + check_deferred_bucket_only_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, RGW_PERM_FULL_CONTROL)) { + return true; + } + + if (!object_acl) { + return false; + } + + bool ret = object_acl->verify_permission(*s->auth.identity, s->perm_mask, perm); + if (ret) { + return true; + } + + if (!s->cct->_conf->rgw_enforce_swift_acls) + return ret; + + if ((perm & (int)s->perm_mask) != perm) + return false; + + int swift_perm = 0; + if (perm & (RGW_PERM_READ | RGW_PERM_READ_ACP)) + swift_perm |= RGW_PERM_READ_OBJS; + if (perm & RGW_PERM_WRITE) + swift_perm |= RGW_PERM_WRITE_OBJS; + + if (!swift_perm) + return false; + + /* we already verified the user mask above, so we pass swift_perm as the mask here, + otherwise the mask might not cover the swift permissions bits */ + if (bucket_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm, + s->info.env->get("HTTP_REFERER"))) + return true; + + if (!user_acl) + return false; + + return user_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm); +} + +bool verify_object_permission_no_policy(struct req_state *s, int perm) +{ + if (!verify_requester_payer_permission(s)) + return false; + + return verify_object_permission_no_policy(s, s->user_acl.get(), + s->bucket_acl, s->object_acl, + perm); +} + +bool verify_object_permission(struct req_state *s, uint64_t op) { return verify_object_permission(s, - s->user_acl.get(), + rgw_obj(s->bucket, s->object), + s->user_acl.get(), s->bucket_acl, s->object_acl, - perm); + s->iam_policy, + op); } class HexTable @@ -1613,7 +1874,7 @@ static int matchignorecase(const char& c1, const char& c2) return 0; } -int match(const string& pattern, const string& input, int flag) +int match(const string& pattern, const string& input, uint32_t flag) { auto last_pos_input = 0, last_pos_pattern = 0; @@ -1625,7 +1886,9 @@ int match(const string& pattern, const string& input, int flag) string substr_pattern = pattern.substr(last_pos_pattern, cur_pos_pattern); int res; - if (flag & POLICY_ACTION || flag & POLICY_ARN) { + if (substr_pattern == "*") { + res = 1; + } else if (flag & MATCH_POLICY_ACTION || flag & MATCH_POLICY_ARN) { res = match_internal(substr_pattern, substr_input, &matchignorecase); } else { res = match_internal(substr_pattern, substr_input, &matchcase); @@ -1636,7 +1899,7 @@ int match(const string& pattern, const string& input, int flag) if (cur_pos_pattern == string::npos && cur_pos_input == string::npos) return 1; else if ((cur_pos_pattern == string::npos && cur_pos_input != string::npos) || - (cur_pos_pattern != string::npos && cur_pos_input == string::npos)) + (cur_pos_pattern != string::npos && cur_pos_input == string::npos)) return 0; last_pos_pattern = cur_pos_pattern + 1; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 5ad7637b44c..6e24bb1d419 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -21,6 +21,7 @@ #include "acconfig.h" #include "rgw_acl.h" #include "rgw_cors.h" +#include "rgw_iam_policy.h" #include "rgw_quota.h" #include "rgw_string.h" #include "rgw_website.h" @@ -29,8 +30,6 @@ #include "cls/rgw/cls_rgw_types.h" #include "include/rados/librados.hpp" -using namespace std; - namespace ceph { class Formatter; } @@ -99,6 +98,10 @@ using ceph::crypto::MD5; #define RGW_ATTR_COMPRESSION RGW_ATTR_PREFIX "compression" +/* IAM Policy */ +#define RGW_ATTR_IAM_POLICY RGW_ATTR_PREFIX "iam-policy" + + /* RGW File Attributes */ #define RGW_ATTR_UNIX_KEY1 RGW_ATTR_PREFIX "unix-key1" #define RGW_ATTR_UNIX1 RGW_ATTR_PREFIX "unix1" @@ -202,6 +205,8 @@ using ceph::crypto::MD5; #define UINT32_MAX (0xffffffffu) #endif +struct req_state; + typedef void *RGWAccessHandle; @@ -263,7 +268,6 @@ enum RGWObjCategory { /** Store error returns for output at a different point in the program */ struct rgw_err { rgw_err(); - rgw_err(int http, const std::string &s3); void clear(); bool is_clear() const; bool is_err() const; @@ -275,6 +279,8 @@ struct rgw_err { std::string message; }; + + /* Helper class used for RGWHTTPArgs parsing */ class NameVal { @@ -451,6 +457,9 @@ enum RGWOpType { RGW_OP_GET_ROLE_POLICY, RGW_OP_LIST_ROLE_POLICIES, RGW_OP_DELETE_ROLE_POLICY, + RGW_OP_PUT_BUCKET_POLICY, + RGW_OP_GET_BUCKET_POLICY, + RGW_OP_DELETE_BUCKET_POLICY, /* rgw specific */ RGW_OP_ADMIN_SET_METADATA, @@ -1338,8 +1347,6 @@ struct RGWStorageStats void dump(Formatter *f) const; }; -struct req_state; - class RGWEnv; /* Namespaced forward declarations. */ @@ -1348,12 +1355,14 @@ namespace rgw { namespace s3 { class RGWGetPolicyV2Extractor; } + class Completer; } namespace io { class BasicClient; } } + struct req_info { RGWEnv *env; RGWHTTPArgs args; @@ -1701,7 +1710,7 @@ struct req_state { const char *length; int64_t content_length; map<string, string> generic_attrs; - struct rgw_err err; + rgw_err err; bool expect_cont; bool header_ended; uint64_t obj_size; @@ -1770,6 +1779,9 @@ struct req_state { RGWAccessControlPolicy *bucket_acl; RGWAccessControlPolicy *object_acl; + rgw::IAM::Environment env; + boost::optional<rgw::IAM::Policy> iam_policy; + /* Is the request made by an user marked as a system one? * Being system user means we also have the admin status. */ bool system_request; @@ -1805,8 +1817,15 @@ struct req_state { req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u); ~req_state(); + + bool is_err() const { return err.is_err(); } }; +void set_req_state_err(struct req_state*, int); +void set_req_state_err(struct req_state*, int, const string&); +void set_req_state_err(struct rgw_err&, int, const int); +void dump(struct req_state*); + /** Store basic data on bucket */ struct RGWBucketEnt { rgw_bucket bucket; @@ -2132,17 +2151,38 @@ bool verify_user_permission(struct req_state * const s, const int perm); bool verify_user_permission(struct req_state * const s, const int perm); -extern bool verify_bucket_permission(struct req_state * s, - RGWAccessControlPolicy * user_acl, - RGWAccessControlPolicy * bucket_acl, - int perm); -extern bool verify_bucket_permission(struct req_state *s, int perm); -extern bool verify_object_permission(struct req_state *s, - RGWAccessControlPolicy * user_acl, - RGWAccessControlPolicy * bucket_acl, - RGWAccessControlPolicy * object_acl, - int perm); -extern bool verify_object_permission(struct req_state *s, int perm); +bool verify_bucket_permission( + struct req_state * const s, + const rgw_bucket& bucket, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + const boost::optional<rgw::IAM::Policy>& bucket_policy, + const uint64_t op); +bool verify_bucket_permission(struct req_state * const s, const uint64_t op); +bool verify_bucket_permission_no_policy( + struct req_state * const s, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + const int perm); +bool verify_bucket_permission_no_policy(struct req_state * const s, + const int perm); +extern bool verify_object_permission( + struct req_state * const s, + const rgw_obj& obj, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + RGWAccessControlPolicy * const object_acl, + const boost::optional<rgw::IAM::Policy>& bucket_policy, + const uint64_t op); +extern bool verify_object_permission(struct req_state *s, uint64_t op); +extern bool verify_object_permission_no_policy( + struct req_state * const s, + RGWAccessControlPolicy * const user_acl, + RGWAccessControlPolicy * const bucket_acl, + RGWAccessControlPolicy * const object_acl, + int perm); +extern bool verify_object_permission_no_policy(struct req_state *s, + int perm); /** Convert an input URL into a sane object name * by converting %-escaped strings into characters, etc*/ extern void rgw_uri_escape_char(char c, string& dst); @@ -2168,5 +2208,12 @@ extern string calc_hash_sha256_close_stream(SHA256 **hash); extern int rgw_parse_op_type_list(const string& str, uint32_t *perm); -int match(const string& pattern, const string& input, int flag); +namespace { + constexpr uint32_t MATCH_POLICY_ACTION = 0x01; + constexpr uint32_t MATCH_POLICY_RESOURCE = 0x02; + constexpr uint32_t MATCH_POLICY_ARN = 0x04; + constexpr uint32_t MATCH_POLICY_STRING = 0x08; +} + +int match(const std::string& pattern, const std::string& input, uint32_t flag); #endif diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc index 85ad290d020..4bb48da49dc 100644 --- a/src/rgw/rgw_coroutine.cc +++ b/src/rgw/rgw_coroutine.cc @@ -1,10 +1,11 @@ - #include "common/ceph_json.h" #include "rgw_coroutine.h" #include "rgw_boost_asio_yield.h" +// re-include our assert to clobber the system one; fix dout: +#include "include/assert.h" #define dout_subsys ceph_subsys_rgw @@ -68,7 +69,7 @@ int RGWCompletionManager::get_next(void **user_info) Mutex::Locker l(lock); while (complete_reqs.empty()) { cond.Wait(lock); - if (going_down.read() != 0) { + if (going_down) { return -ECANCELED; } } @@ -94,7 +95,7 @@ void RGWCompletionManager::go_down() for (auto cn : cns) { cn->unregister(); } - going_down.set(1); + going_down = true; cond.Signal(); } @@ -460,7 +461,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks) bool canceled = false; // set on going_down RGWCoroutinesEnv env; - uint64_t run_context = run_context_count.inc(); + uint64_t run_context = ++run_context_count; lock.get_write(); set<RGWCoroutinesStack *>& context_stacks = run_contexts[run_context]; @@ -475,7 +476,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks) env.manager = this; env.scheduled_stacks = &scheduled_stacks; - for (list<RGWCoroutinesStack *>::iterator iter = scheduled_stacks.begin(); iter != scheduled_stacks.end() && !going_down.read();) { + for (list<RGWCoroutinesStack *>::iterator iter = scheduled_stacks.begin(); iter != scheduled_stacks.end() && !going_down;) { lock.get_write(); RGWCoroutinesStack *stack = *iter; @@ -566,7 +567,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks) if (ret < 0) { ldout(cct, 0) << "ERROR: failed to clone shard, completion_mgr.get_next() returned ret=" << ret << dendl; } - if (going_down.read() > 0) { + if (going_down) { ldout(cct, 5) << __func__ << "(): was stopped, exiting" << dendl; ret = -ECANCELED; canceled = true; @@ -585,7 +586,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks) } lock.get_write(); - if (!context_stacks.empty() && !going_down.read()) { + if (!context_stacks.empty() && !going_down) { JSONFormatter formatter(true); formatter.open_array_section("context_stacks"); for (auto& s : context_stacks) { @@ -595,7 +596,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks) lderr(cct) << __func__ << "(): ERROR: deadlock detected, dumping remaining coroutines:\n"; formatter.flush(*_dout); *_dout << dendl; - assert(context_stacks.empty() || going_down.read()); // assert on deadlock + assert(context_stacks.empty() || going_down); // assert on deadlock } for (auto stack : context_stacks) { diff --git a/src/rgw/rgw_coroutine.h b/src/rgw/rgw_coroutine.h index 821ff55889a..66afa74b375 100644 --- a/src/rgw/rgw_coroutine.h +++ b/src/rgw/rgw_coroutine.h @@ -22,6 +22,8 @@ #include "rgw_common.h" #include "rgw_boost_asio_coroutine.h" +#include <atomic> + #define RGW_ASYNC_OPS_MGR_WINDOW 100 class RGWCoroutinesStack; @@ -39,7 +41,7 @@ class RGWCompletionManager : public RefCountedObject { SafeTimer timer; - atomic_t going_down; + std::atomic<bool> going_down = { false }; map<void *, void *> waiters; @@ -506,9 +508,9 @@ public: class RGWCoroutinesManager { CephContext *cct; - atomic_t going_down; + std::atomic<bool> going_down = { false }; - atomic64_t run_context_count; + std::atomic<int64_t> run_context_count = { 0 }; map<uint64_t, set<RGWCoroutinesStack *> > run_contexts; RWLock lock; @@ -542,7 +544,8 @@ public: int run(list<RGWCoroutinesStack *>& ops); int run(RGWCoroutine *op); void stop() { - if (going_down.inc() == 1) { + bool expected = false; + if (going_down.compare_exchange_strong(expected, true)) { completion_mgr->go_down(); } } diff --git a/src/rgw/rgw_cors_s3.h b/src/rgw/rgw_cors_s3.h index cad423c10a2..1e60e651512 100644 --- a/src/rgw/rgw_cors_s3.h +++ b/src/rgw/rgw_cors_s3.h @@ -18,15 +18,12 @@ #include <map> #include <string> #include <iosfwd> -#include <expat.h> #include <include/types.h> #include <common/Formatter.h> #include "rgw_xml.h" #include "rgw_cors.h" -using namespace std; - class RGWCORSRule_S3 : public RGWCORSRule, public XMLObj { public: diff --git a/src/rgw/rgw_cors_swift.h b/src/rgw/rgw_cors_swift.h index 6aef5e13561..d2516dd0357 100644 --- a/src/rgw/rgw_cors_swift.h +++ b/src/rgw/rgw_cors_swift.h @@ -23,8 +23,6 @@ #include "rgw_cors.h" -using namespace std; - class RGWCORSConfiguration_SWIFT : public RGWCORSConfiguration { public: diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc index 489ef589172..0e0be84c82c 100644 --- a/src/rgw/rgw_cr_rados.cc +++ b/src/rgw/rgw_cr_rados.cc @@ -65,7 +65,7 @@ void RGWAsyncRadosProcessor::start() { } void RGWAsyncRadosProcessor::stop() { - going_down.set(1); + going_down = true; m_tp.drain(&req_wq); m_tp.stop(); for (auto iter = m_req_queue.begin(); iter != m_req_queue.end(); ++iter) { @@ -116,14 +116,14 @@ int RGWSimpleRadosReadAttrsCR::request_complete() int RGWAsyncPutSystemObj::_send_request() { - return store->put_system_obj_data(NULL, obj, bl, -1, exclusive); + return store->put_system_obj_data(NULL, obj, bl, -1, exclusive, objv_tracker); } RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store, - const rgw_raw_obj& _obj, bool _exclusive, - bufferlist& _bl) : RGWAsyncRadosRequest(caller, cn), store(_store), - obj(_obj), exclusive(_exclusive), - bl(_bl) + RGWObjVersionTracker *_objv_tracker, rgw_raw_obj& _obj, + bool _exclusive, bufferlist& _bl) + : RGWAsyncRadosRequest(caller, cn), store(_store), objv_tracker(_objv_tracker), + obj(_obj), exclusive(_exclusive), bl(_bl) { } @@ -315,6 +315,40 @@ int RGWRadosRemoveOmapKeysCR::send_request() { return ref.ioctx.aio_operate(ref.oid, cn->completion(), &op); } +RGWRadosRemoveCR::RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj) + : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj) +{ + set_description() << "remove dest=" << obj; +} + +int RGWRadosRemoveCR::send_request() +{ + auto rados = store->get_rados_handle(); + int r = rados->ioctx_create(obj.pool.name.c_str(), ioctx); + if (r < 0) { + lderr(cct) << "ERROR: failed to open pool (" << obj.pool.name << ") ret=" << r << dendl; + return r; + } + ioctx.locator_set_key(obj.loc); + + set_status() << "send request"; + + librados::ObjectWriteOperation op; + op.remove(); + + cn = stack->create_completion_notifier(); + return ioctx.aio_operate(obj.oid, cn->completion(), &op); +} + +int RGWRadosRemoveCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, const rgw_raw_obj& _obj, const string& _lock_name, @@ -507,7 +541,6 @@ int RGWAsyncFetchRemoteObj::_send_request() &key.instance, /* string *version_id, */ NULL, /* string *ptag, */ NULL, /* string *petag, */ - NULL, /* struct rgw_err *err, */ NULL, /* void (*progress_cb)(off_t, void *), */ NULL); /* void *progress_data*); */ @@ -627,7 +660,7 @@ int RGWContinuousLeaseCR::operate() return set_cr_done(); } reenter(this) { - while (!going_down.read()) { + while (!going_down) { yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval)); caller->set_sleeping(false); /* will only be relevant when we return, that's why we can do it early */ @@ -722,6 +755,29 @@ int RGWRadosTimelogTrimCR::request_complete() return r; } + +RGWSyncLogTrimCR::RGWSyncLogTrimCR(RGWRados *store, const std::string& oid, + const std::string& to_marker, + std::string *last_trim_marker) + : RGWRadosTimelogTrimCR(store, oid, real_time{}, real_time{}, + std::string{}, to_marker), + cct(store->ctx()), last_trim_marker(last_trim_marker) +{ +} + +int RGWSyncLogTrimCR::request_complete() +{ + int r = RGWRadosTimelogTrimCR::request_complete(); + if (r < 0 && r != -ENODATA) { + return r; + } + if (*last_trim_marker < to_marker) { + *last_trim_marker = to_marker; + } + return 0; +} + + int RGWAsyncStatObj::_send_request() { rgw_raw_obj raw_obj; diff --git a/src/rgw/rgw_cr_rados.h b/src/rgw/rgw_cr_rados.h index 8496201e0cd..a892b2a6db0 100644 --- a/src/rgw/rgw_cr_rados.h +++ b/src/rgw/rgw_cr_rados.h @@ -1,11 +1,15 @@ #ifndef CEPH_RGW_CR_RADOS_H #define CEPH_RGW_CR_RADOS_H +#include <boost/intrusive_ptr.hpp> +#include "include/assert.h" #include "rgw_coroutine.h" #include "rgw_rados.h" #include "common/WorkQueue.h" #include "common/Throttle.h" +#include <atomic> + class RGWAsyncRadosRequest : public RefCountedObject { RGWCoroutine *caller; RGWAioCompletionNotifier *notifier; @@ -57,7 +61,7 @@ public: class RGWAsyncRadosProcessor { deque<RGWAsyncRadosRequest *> m_req_queue; - atomic_t going_down; + std::atomic<bool> going_down = { false }; protected: RGWRados *store; ThreadPool m_tp; @@ -91,7 +95,7 @@ public: void queue(RGWAsyncRadosRequest *req); bool is_going_down() { - return (going_down.read() != 0); + return going_down; } }; @@ -117,6 +121,7 @@ public: class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest { RGWRados *store; + RGWObjVersionTracker *objv_tracker; rgw_raw_obj obj; bool exclusive; bufferlist bl; @@ -125,8 +130,8 @@ protected: int _send_request() override; public: RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store, - const rgw_raw_obj& _obj, bool _exclusive, - bufferlist& _bl); + RGWObjVersionTracker *_objv_tracker, rgw_raw_obj& _obj, + bool _exclusive, bufferlist& _bl); }; class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest { @@ -187,16 +192,18 @@ class RGWSimpleRadosReadCR : public RGWSimpleCoroutine { T *result; /// on ENOENT, call handle_data() with an empty object instead of failing const bool empty_on_enoent; + RGWObjVersionTracker *objv_tracker; RGWAsyncGetSystemObj *req{nullptr}; public: RGWSimpleRadosReadCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, const rgw_raw_obj& _obj, - T *_result, bool empty_on_enoent = true) + T *_result, bool empty_on_enoent = true, + RGWObjVersionTracker *objv_tracker = nullptr) : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store), obj_ctx(store), obj(_obj), result(_result), - empty_on_enoent(empty_on_enoent) {} + empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {} ~RGWSimpleRadosReadCR() override { request_cleanup(); } @@ -220,7 +227,7 @@ template <class T> int RGWSimpleRadosReadCR<T>::send_request() { req = new RGWAsyncGetSystemObj(this, stack->create_completion_notifier(), - store, &obj_ctx, NULL, + store, &obj_ctx, objv_tracker, obj, &bl, 0, -1); if (pattrs) { @@ -303,17 +310,16 @@ class RGWSimpleRadosWriteCR : public RGWSimpleCoroutine { bufferlist bl; rgw_raw_obj obj; + RGWObjVersionTracker *objv_tracker; - RGWAsyncPutSystemObj *req; + RGWAsyncPutSystemObj *req{nullptr}; public: RGWSimpleRadosWriteCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, const rgw_raw_obj& _obj, - const T& _data) : RGWSimpleCoroutine(_store->ctx()), - async_rados(_async_rados), - store(_store), - obj(_obj), - req(NULL) { + const T& _data, RGWObjVersionTracker *objv_tracker = nullptr) + : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), + store(_store), obj(_obj), objv_tracker(objv_tracker) { ::encode(_data, bl); } @@ -330,7 +336,7 @@ public: int send_request() override { req = new RGWAsyncPutSystemObj(this, stack->create_completion_notifier(), - store, obj, false, bl); + store, objv_tracker, obj, false, bl); async_rados->queue(req); return 0; } @@ -462,6 +468,19 @@ public: } }; +class RGWRadosRemoveCR : public RGWSimpleCoroutine { + RGWRados *store; + librados::IoCtx ioctx; + const rgw_raw_obj obj; + boost::intrusive_ptr<RGWAioCompletionNotifier> cn; + +public: + RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj); + + int send_request(); + int request_complete(); +}; + class RGWSimpleRadosLockCR : public RGWSimpleCoroutine { RGWAsyncRadosProcessor *async_rados; RGWRados *store; @@ -1014,7 +1033,7 @@ class RGWContinuousLeaseCR : public RGWCoroutine { int interval; Mutex lock; - atomic_t going_down; + std::atomic<bool> going_down = { false }; bool locked{false}; RGWCoroutine *caller; @@ -1044,7 +1063,7 @@ public: } void go_down() { - going_down.set(1); + going_down = true; wakeup(); } @@ -1091,6 +1110,16 @@ class RGWRadosTimelogTrimCR : public RGWSimpleCoroutine { int request_complete() override; }; +// wrapper to update last_trim_marker on success +class RGWSyncLogTrimCR : public RGWRadosTimelogTrimCR { + CephContext *cct; + std::string *last_trim_marker; + public: + RGWSyncLogTrimCR(RGWRados *store, const std::string& oid, + const std::string& to_marker, std::string *last_trim_marker); + int request_complete() override; +}; + class RGWAsyncStatObj : public RGWAsyncRadosRequest { RGWRados *store; RGWBucketInfo bucket_info; diff --git a/src/rgw/rgw_crypt.cc b/src/rgw/rgw_crypt.cc index cf48e27751f..0d57f4da270 100644 --- a/src/rgw/rgw_crypt.cc +++ b/src/rgw/rgw_crypt.cc @@ -1053,7 +1053,9 @@ static const crypt_option_names crypt_options[] = { static boost::string_ref get_crypt_attribute( RGWEnv* env, - map<string, post_form_part, const ltstr_nocase>* parts, + std::map<std::string, + RGWPostObj_ObjStore::post_form_part, + const ltstr_nocase>* parts, crypt_option_e option) { static_assert( @@ -1079,10 +1081,12 @@ static boost::string_ref get_crypt_attribute( int rgw_s3_prepare_encrypt(struct req_state* s, - map<string, bufferlist>& attrs, - map<string, post_form_part, const ltstr_nocase>* parts, - std::unique_ptr<BlockCrypt>* block_crypt, - std::map<std::string, std::string>& crypt_http_responses) + std::map<std::string, ceph::bufferlist>& attrs, + std::map<std::string, + RGWPostObj_ObjStore::post_form_part, + const ltstr_nocase>* parts, + std::unique_ptr<BlockCrypt>* block_crypt, + std::map<std::string, std::string>& crypt_http_responses) { int res = 0; crypt_http_responses.clear(); diff --git a/src/rgw/rgw_crypt.h b/src/rgw/rgw_crypt.h index a6b7df0a42a..1774983758d 100644 --- a/src/rgw/rgw_crypt.h +++ b/src/rgw/rgw_crypt.h @@ -7,6 +7,7 @@ #define CEPH_RGW_CRYPT_H #include <rgw/rgw_op.h> +#include <rgw/rgw_rest.h> #include <rgw/rgw_rest_s3.h> #include <boost/utility/string_ref.hpp> @@ -136,14 +137,18 @@ public: int rgw_s3_prepare_encrypt(struct req_state* s, - map<string, bufferlist>& attrs, - map<string, post_form_part, const ltstr_nocase>* parts, - std::unique_ptr<BlockCrypt>* block_crypt, - std::map<std::string, std::string>& crypt_http_responses); + std::map<std::string, ceph::bufferlist>& attrs, + std::map<std::string, + RGWPostObj_ObjStore::post_form_part, + const ltstr_nocase>* parts, + std::unique_ptr<BlockCrypt>* block_crypt, + std::map<std::string, + std::string>& crypt_http_responses); int rgw_s3_prepare_decrypt(struct req_state* s, - map<string, bufferlist>& attrs, - std::unique_ptr<BlockCrypt>* block_crypt, - std::map<std::string, std::string>& crypt_http_responses); + std::map<std::string, ceph::bufferlist>& attrs, + std::unique_ptr<BlockCrypt>* block_crypt, + std::map<std::string, + std::string>& crypt_http_responses); #endif diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index cbe3a91d8e1..e3f3b079365 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -456,22 +456,25 @@ bool RGWListRemoteDataLogCR::spawn_next() { } class RGWInitDataSyncStatusCoroutine : public RGWCoroutine { + static constexpr uint32_t lock_duration = 30; RGWDataSyncEnv *sync_env; - RGWRados *store; + const rgw_pool& pool; + const uint32_t num_shards; string sync_status_oid; string lock_name; string cookie; - rgw_data_sync_info status; + rgw_data_sync_status *status; map<int, RGWDataChangesLogInfo> shards_info; public: - RGWInitDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env, - uint32_t _num_shards) : RGWCoroutine(_sync_env->cct), - sync_env(_sync_env), store(sync_env->store) { + RGWInitDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env, uint32_t num_shards, + rgw_data_sync_status *status) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), store(sync_env->store), + pool(store->get_zone_params().log_pool), + num_shards(num_shards), status(status) { lock_name = "sync_lock"; - status.num_shards = _num_shards; #define COOKIE_LEN 16 char buf[COOKIE_LEN + 1]; @@ -485,33 +488,32 @@ public: int operate() override { int ret; reenter(this) { - yield { - uint32_t lock_duration = 30; - call(new RGWSimpleRadosLockCR(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid), - lock_name, cookie, lock_duration)); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl; - return set_cr_error(retcode); - } + using LockCR = RGWSimpleRadosLockCR; + yield call(new LockCR(sync_env->async_rados, store, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie, lock_duration)); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl; + return set_cr_error(retcode); } - yield { - call(new RGWSimpleRadosWriteCR<rgw_data_sync_info>(sync_env->async_rados, - store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid), - status)); - } - yield { /* take lock again, we just recreated the object */ - uint32_t lock_duration = 30; - call(new RGWSimpleRadosLockCR(sync_env->async_rados, - store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid), - lock_name, cookie, lock_duration)); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl; - return set_cr_error(retcode); - } + using WriteInfoCR = RGWSimpleRadosWriteCR<rgw_data_sync_info>; + yield call(new WriteInfoCR(sync_env->async_rados, store, + rgw_raw_obj{pool, sync_status_oid}, + status->sync_info)); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: failed to write sync status info with " << retcode << dendl; + return set_cr_error(retcode); + } + + /* take lock again, we just recreated the object */ + yield call(new LockCR(sync_env->async_rados, store, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie, lock_duration)); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl; + return set_cr_error(retcode); } + /* fetch current position in logs */ yield { RGWRESTConn *conn = store->get_zone_conn_by_id(sync_env->source_zone); @@ -519,46 +521,48 @@ public: ldout(cct, 0) << "ERROR: connection to zone " << sync_env->source_zone << " does not exist!" << dendl; return set_cr_error(-EIO); } - for (int i = 0; i < (int)status.num_shards; i++) { + for (uint32_t i = 0; i < num_shards; i++) { spawn(new RGWReadRemoteDataLogShardInfoCR(sync_env, i, &shards_info[i]), true); - } + } } while (collect(&ret, NULL)) { - if (ret < 0) { - return set_state(RGWCoroutine_Error); - } + if (ret < 0) { + ldout(cct, 0) << "ERROR: failed to read remote data log shards" << dendl; + return set_state(RGWCoroutine_Error); + } yield; } yield { - for (int i = 0; i < (int)status.num_shards; i++) { - rgw_data_sync_marker marker; + for (uint32_t i = 0; i < num_shards; i++) { RGWDataChangesLogInfo& info = shards_info[i]; - marker.next_step_marker = info.marker; - marker.timestamp = info.last_update; - spawn(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, i)), - marker), true); + auto& marker = status->sync_markers[i]; + marker.next_step_marker = info.marker; + marker.timestamp = info.last_update; + const auto& oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, i); + using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_data_sync_marker>; + spawn(new WriteMarkerCR(sync_env->async_rados, store, + rgw_raw_obj{pool, oid}, marker), true); } } - yield { - status.state = rgw_data_sync_info::StateBuildingFullSyncMaps; - call(new RGWSimpleRadosWriteCR<rgw_data_sync_info>(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid), - status)); - } - yield { /* unlock */ - call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, - store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid), - lock_name, cookie)); - } while (collect(&ret, NULL)) { - if (ret < 0) { - return set_state(RGWCoroutine_Error); - } + if (ret < 0) { + ldout(cct, 0) << "ERROR: failed to write data sync status markers" << dendl; + return set_state(RGWCoroutine_Error); + } yield; } - drain_all(); + + status->sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps; + yield call(new WriteInfoCR(sync_env->async_rados, store, + rgw_raw_obj{pool, sync_status_oid}, + status->sync_info)); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: failed to write sync status info with " << retcode << dendl; + return set_cr_error(retcode); + } + yield call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, store, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie)); return set_cr_done(); } return 0; @@ -666,6 +670,7 @@ int RGWRemoteDataLog::read_sync_status(rgw_data_sync_status *sync_status) int RGWRemoteDataLog::init_sync_status(int num_shards) { + rgw_data_sync_status sync_status; RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry()); RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); int ret = http_manager.set_threaded(); @@ -675,7 +680,7 @@ int RGWRemoteDataLog::init_sync_status(int num_shards) } RGWDataSyncEnv sync_env_local = sync_env; sync_env_local.http_manager = &http_manager; - ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards)); + ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards, &sync_status)); http_manager.stop(); return ret; } @@ -1240,11 +1245,6 @@ public: int incremental_sync() { reenter(&incremental_cr) { - error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store, - rgw_raw_obj(pool, error_oid), - 1 /* no buffer */); - error_repo->get(); - spawn(error_repo, false); yield init_lease_cr(); while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -1256,6 +1256,11 @@ public: yield; } set_status("lease acquired"); + error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store, + rgw_raw_obj(pool, error_oid), + 1 /* no buffer */); + error_repo->get(); + spawn(error_repo, false); logger.log("inc sync"); set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker)); do { @@ -1458,20 +1463,12 @@ public: /* state: init status */ if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateInit) { ldout(sync_env->cct, 20) << __func__ << "(): init" << dendl; - yield call(new RGWInitDataSyncStatusCoroutine(sync_env, sync_status.sync_info.num_shards)); + yield call(new RGWInitDataSyncStatusCoroutine(sync_env, num_shards, &sync_status)); if (retcode < 0) { ldout(sync_env->cct, 0) << "ERROR: failed to init sync, retcode=" << retcode << dendl; return set_cr_error(retcode); } - sync_status.sync_info.num_shards = num_shards; - sync_status.sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps; - /* update new state */ - yield call(set_sync_info_cr()); - - if (retcode < 0) { - ldout(sync_env->cct, 0) << "ERROR: failed to write sync status, retcode=" << retcode << dendl; - return set_cr_error(retcode); - } + // sets state = StateBuildingFullSyncMaps *reset_backoff = true; } @@ -2894,6 +2891,7 @@ string RGWBucketSyncStatusManager::status_oid(const string& source_zone, } +// TODO: move into rgw_data_sync_trim.cc #undef dout_prefix #define dout_prefix (*_dout << "data trim: ") @@ -2939,28 +2937,7 @@ void take_min_markers(IterIn first, IterIn last, IterOut dest) } } -// wrapper to update last_trim_marker on success -class LastTimelogTrimCR : public RGWRadosTimelogTrimCR { - CephContext *cct; - std::string *last_trim_marker; - public: - LastTimelogTrimCR(RGWRados *store, const std::string& oid, - const std::string& to_marker, std::string *last_trim_marker) - : RGWRadosTimelogTrimCR(store, oid, real_time{}, real_time{}, - std::string{}, to_marker), - cct(store->ctx()), last_trim_marker(last_trim_marker) - {} - int request_complete() override { - int r = RGWRadosTimelogTrimCR::request_complete(); - if (r < 0 && r != -ENODATA) { - ldout(cct, 1) << "failed to trim datalog: " << cpp_strerror(r) << dendl; - return r; - } - ldout(cct, 10) << "datalog trimmed to marker " << to_marker << dendl; - *last_trim_marker = to_marker; - return 0; - } -}; +} // anonymous namespace class DataLogTrimCR : public RGWCoroutine { RGWRados *store; @@ -3039,7 +3016,7 @@ int DataLogTrimCR::operate() ldout(cct, 10) << "trimming log shard " << i << " at marker=" << stable << " last_trim=" << last_trim[i] << dendl; - using TrimCR = LastTimelogTrimCR; + using TrimCR = RGWSyncLogTrimCR; spawn(new TrimCR(store, store->data_log->get_oid(i), stable, &last_trim[i]), true); @@ -3103,8 +3080,6 @@ int DataLogTrimPollCR::operate() return 0; } -} // anonymous namespace - RGWCoroutine* create_data_log_trim_cr(RGWRados *store, RGWHTTPManager *http, int num_shards, utime_t interval) diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index be3d356ec41..f79dd0a78b7 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -27,6 +27,8 @@ #include "rgw_file.h" #include "rgw_lib_frontend.h" +#include <atomic> + #define dout_subsys ceph_subsys_rgw using namespace rgw; @@ -37,7 +39,7 @@ namespace rgw { const string RGWFileHandle::root_name = "/"; - atomic<uint32_t> RGWLibFS::fs_inst_counter; + std::atomic<uint32_t> RGWLibFS::fs_inst_counter; uint32_t RGWLibFS::write_completion_interval_s = 10; @@ -1148,6 +1150,8 @@ namespace rgw { int rc = write_finish(FLAG_LOCKED); flags &= ~FLAG_OPEN; + flags &= ~FLAG_STATELESS_OPEN; + return rc; } /* RGWFileHandle::close */ @@ -1417,7 +1421,6 @@ int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags) { RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private); fs->close(); - fs->rele(); return 0; } diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h index 5d964f9c2aa..1edc37a9f60 100644 --- a/src/rgw/rgw_file.h +++ b/src/rgw/rgw_file.h @@ -282,7 +282,7 @@ namespace rgw { variant_type = directory(); flags |= FLAG_BUCKET; } else { - bucket = (parent->flags & FLAG_BUCKET) ? parent + bucket = parent->is_bucket() ? parent : parent->bucket; if (flags & FLAG_DIRECTORY) { fh.fh_type = RGW_FS_TYPE_DIRECTORY; @@ -410,7 +410,7 @@ namespace rgw { const std::string& bucket_name() const { if (is_root()) return root_name; - if (flags & FLAG_BUCKET) + if (is_bucket()) return name; return bucket->object_name(); } @@ -506,7 +506,7 @@ namespace rgw { int open(uint32_t gsh_flags) { lock_guard guard(mtx); - if (! (flags & FLAG_OPEN)) { + if (! is_open()) { if (gsh_flags & RGW_OPEN_FLAG_V3) { flags |= FLAG_STATELESS_OPEN; } @@ -745,7 +745,7 @@ namespace rgw { RGWUserInfo user; RGWAccessKey key; // XXXX acc_key - static atomic<uint32_t> fs_inst_counter; + static std::atomic<uint32_t> fs_inst_counter; static uint32_t write_completion_interval_s; std::string fsid; @@ -778,7 +778,7 @@ namespace rgw { } void operator()() { - rgw_fh.write_finish(); + rgw_fh.close(); /* will finish in-progress write */ rgw_fh.get_fs()->unref(&rgw_fh); } }; @@ -1449,9 +1449,9 @@ public: op = this; } - virtual bool only_bucket() override { return false; } + bool only_bucket() override { return false; } - virtual int op_init() override { + int op_init() override { // assign store, s, and dialect_handler RGWObjectCtx* rados_ctx = static_cast<RGWObjectCtx*>(get_state()->obj_ctx); @@ -1462,7 +1462,7 @@ public: return 0; } - virtual int header_init() override { + int header_init() override { struct req_state* s = get_state(); s->info.method = "GET"; s->op = OP_GET; @@ -1484,12 +1484,12 @@ public: return 0; } - virtual int get_params() override { + int get_params() override { max = default_max; return 0; } - virtual void send_response() override { + void send_response() override { valid = true; if ((objs.size() > 1) || (! objs.empty() && diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc index c46f693daea..8fb461292dc 100644 --- a/src/rgw/rgw_gc.cc +++ b/src/rgw/rgw_gc.cc @@ -253,7 +253,7 @@ int RGWGC::process() bool RGWGC::going_down() { - return (down_flag.read() != 0); + return down_flag; } void RGWGC::start_processor() @@ -264,7 +264,7 @@ void RGWGC::start_processor() void RGWGC::stop_processor() { - down_flag.set(1); + down_flag = true; if (worker) { worker->stop(); worker->join(); diff --git a/src/rgw/rgw_gc.h b/src/rgw/rgw_gc.h index ca48a6e75e5..491796b50e8 100644 --- a/src/rgw/rgw_gc.h +++ b/src/rgw/rgw_gc.h @@ -6,7 +6,6 @@ #include "include/types.h" -#include "include/atomic.h" #include "include/rados/librados.hpp" #include "common/Mutex.h" #include "common/Cond.h" @@ -15,12 +14,14 @@ #include "rgw_rados.h" #include "cls/rgw/cls_rgw_types.h" +#include <atomic> + class RGWGC { CephContext *cct; RGWRados *store; int max_objs; string *obj_names; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; int tag_index(const string& tag); diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc index 070110e5099..81c4c6ef6bc 100644 --- a/src/rgw/rgw_http_client.cc +++ b/src/rgw/rgw_http_client.cc @@ -16,6 +16,8 @@ #include "rgw_coroutine.h" +#include <atomic> + #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw @@ -24,7 +26,7 @@ struct rgw_http_req_data : public RefCountedObject { curl_slist *h; uint64_t id; int ret; - atomic_t done; + std::atomic<bool> done = { false }; RGWHTTPClient *client; void *user_info; bool registered; @@ -58,12 +60,12 @@ struct rgw_http_req_data : public RefCountedObject { easy_handle = NULL; h = NULL; - done.set(1); + done = true; cond.Signal(); } bool is_done() { - return done.read() != 0; + return done; } int get_retcode() { @@ -900,14 +902,14 @@ int RGWHTTPManager::set_threaded() void RGWHTTPManager::stop() { - if (is_stopped.read()) { + if (is_stopped) { return; } - is_stopped.set(1); + is_stopped = true; if (is_threaded) { - going_down.set(1); + going_down = true; signal_thread(); reqs_thread->join(); delete reqs_thread; @@ -935,7 +937,7 @@ void *RGWHTTPManager::reqs_thread_entry() ldout(cct, 20) << __func__ << ": start" << dendl; - while (!going_down.read()) { + while (!going_down) { int ret = do_curl_wait(cct, (CURLM *)multi_handle, thread_pipe[0]); if (ret < 0) { dout(0) << "ERROR: do_curl_wait() returned: " << ret << dendl; diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h index 4266b7b50ba..cbe4f3d0312 100644 --- a/src/rgw/rgw_http_client.h +++ b/src/rgw/rgw_http_client.h @@ -6,10 +6,11 @@ #include "common/RWLock.h" #include "common/Cond.h" -#include "include/atomic.h" #include "rgw_common.h" #include "rgw_string.h" +#include <atomic> + using param_pair_t = pair<string, string>; using param_vec_t = vector<param_pair_t>; @@ -33,7 +34,7 @@ class RGWHTTPClient string last_url; bool verify_ssl; // Do not validate self signed certificates, default to false - atomic_t stopped; + std::atomic<unsigned> stopped { 0 }; protected: CephContext *cct; @@ -219,8 +220,8 @@ class RGWHTTPManager { RGWCompletionManager *completion_mgr; void *multi_handle; bool is_threaded; - atomic_t going_down; - atomic_t is_stopped; + std::atomic<unsigned> going_down { 0 }; + std::atomic<unsigned> is_stopped { 0 }; RWLock reqs_lock; map<uint64_t, rgw_http_req_data *> reqs; diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h index a0423bb48fd..e2aec310c88 100644 --- a/src/rgw/rgw_http_errors.h +++ b/src/rgw/rgw_http_errors.h @@ -6,144 +6,11 @@ #include "rgw_common.h" -struct rgw_http_errors { - int err_no; - int http_ret; - const char *s3_code; -}; +typedef const std::map<int,const std::pair<int, const char*>> rgw_http_errors; -const static struct rgw_http_errors RGW_HTTP_ERRORS[] = { - { 0, 200, "" }, - { STATUS_CREATED, 201, "Created" }, - { STATUS_ACCEPTED, 202, "Accepted" }, - { STATUS_NO_CONTENT, 204, "NoContent" }, - { STATUS_PARTIAL_CONTENT, 206, "" }, - { ERR_PERMANENT_REDIRECT, 301, "PermanentRedirect" }, - { ERR_WEBSITE_REDIRECT, 301, "WebsiteRedirect" }, - { STATUS_REDIRECT, 303, "" }, - { ERR_NOT_MODIFIED, 304, "NotModified" }, - { EINVAL, 400, "InvalidArgument" }, - { ERR_INVALID_REQUEST, 400, "InvalidRequest" }, - { ERR_INVALID_DIGEST, 400, "InvalidDigest" }, - { ERR_BAD_DIGEST, 400, "BadDigest" }, - { ERR_INVALID_BUCKET_NAME, 400, "InvalidBucketName" }, - { ERR_INVALID_OBJECT_NAME, 400, "InvalidObjectName" }, - { ERR_UNRESOLVABLE_EMAIL, 400, "UnresolvableGrantByEmailAddress" }, - { ERR_INVALID_PART, 400, "InvalidPart" }, - { ERR_INVALID_PART_ORDER, 400, "InvalidPartOrder" }, - { ERR_REQUEST_TIMEOUT, 400, "RequestTimeout" }, - { ERR_TOO_LARGE, 400, "EntityTooLarge" }, - { ERR_TOO_SMALL, 400, "EntityTooSmall" }, - { ERR_TOO_MANY_BUCKETS, 400, "TooManyBuckets" }, - { ERR_MALFORMED_XML, 400, "MalformedXML" }, - { ERR_AMZ_CONTENT_SHA256_MISMATCH, 400, "XAmzContentSHA256Mismatch" }, - { ERR_MALFORMED_DOC, 400, "MalformedPolicyDocument" }, - { ERR_LENGTH_REQUIRED, 411, "MissingContentLength" }, - { EACCES, 403, "AccessDenied" }, - { EPERM, 403, "AccessDenied" }, - { ERR_SIGNATURE_NO_MATCH, 403, "SignatureDoesNotMatch" }, - { ERR_INVALID_ACCESS_KEY, 403, "InvalidAccessKeyId" }, - { ERR_USER_SUSPENDED, 403, "UserSuspended" }, - { ERR_REQUEST_TIME_SKEWED, 403, "RequestTimeTooSkewed" }, - { ERR_QUOTA_EXCEEDED, 403, "QuotaExceeded" }, - { ENOENT, 404, "NoSuchKey" }, - { ERR_NO_SUCH_BUCKET, 404, "NoSuchBucket" }, - { ERR_NO_SUCH_WEBSITE_CONFIGURATION, 404, "NoSuchWebsiteConfiguration" }, - { ERR_NO_SUCH_UPLOAD, 404, "NoSuchUpload" }, - { ERR_NOT_FOUND, 404, "Not Found"}, - { ERR_NO_SUCH_LC, 404, "NoSuchLifecycleConfiguration"}, - { ERR_NO_ROLE_FOUND, 404, "NoSuchEntity"}, - { ERR_METHOD_NOT_ALLOWED, 405, "MethodNotAllowed" }, - { ETIMEDOUT, 408, "RequestTimeout" }, - { EEXIST, 409, "BucketAlreadyExists" }, - { ERR_USER_EXIST, 409, "UserAlreadyExists" }, - { ERR_EMAIL_EXIST, 409, "EmailExists" }, - { ERR_KEY_EXIST, 409, "KeyExists"}, - { ERR_ROLE_EXISTS, 409, "EntityAlreadyExists"}, - { ERR_DELETE_CONFLICT, 409, "DeleteConflict"}, - { ERR_INVALID_SECRET_KEY, 400, "InvalidSecretKey"}, - { ERR_INVALID_KEY_TYPE, 400, "InvalidKeyType"}, - { ERR_INVALID_CAP, 400, "InvalidCapability"}, - { ERR_INVALID_TENANT_NAME, 400, "InvalidTenantName" }, - { ENOTEMPTY, 409, "BucketNotEmpty" }, - { ERR_PRECONDITION_FAILED, 412, "PreconditionFailed" }, - { ERANGE, 416, "InvalidRange" }, - { ERR_UNPROCESSABLE_ENTITY, 422, "UnprocessableEntity" }, - { ERR_LOCKED, 423, "Locked" }, - { ERR_INTERNAL_ERROR, 500, "InternalError" }, - { ERR_NOT_IMPLEMENTED, 501, "NotImplemented" }, - { ERR_SERVICE_UNAVAILABLE, 503, "ServiceUnavailable"} -}; - -const static struct rgw_http_errors RGW_HTTP_SWIFT_ERRORS[] = { - { EACCES, 403, "AccessDenied" }, - { EPERM, 401, "AccessDenied" }, - { ERR_USER_SUSPENDED, 401, "UserSuspended" }, - { ERR_INVALID_UTF8, 412, "Invalid UTF8" }, - { ERR_BAD_URL, 412, "Bad URL" }, - { ERR_NOT_SLO_MANIFEST, 400, "Not an SLO manifest" }, - { ERR_QUOTA_EXCEEDED, 413, "QuotaExceeded" } -}; - -struct rgw_http_status_code { - int code; - const char *name; -}; - -const static struct rgw_http_status_code http_codes[] = { - { 100, "Continue" }, - { 200, "OK" }, - { 201, "Created" }, - { 202, "Accepted" }, - { 204, "No Content" }, - { 205, "Reset Content" }, - { 206, "Partial Content" }, - { 207, "Multi Status" }, - { 208, "Already Reported" }, - { 300, "Multiple Choices" }, - { 301, "Moved Permanently" }, - { 302, "Found" }, - { 303, "See Other" }, - { 304, "Not Modified" }, - { 305, "User Proxy" }, - { 306, "Switch Proxy" }, - { 307, "Temporary Redirect" }, - { 308, "Permanent Redirect" }, - { 400, "Bad Request" }, - { 401, "Unauthorized" }, - { 402, "Payment Required" }, - { 403, "Forbidden" }, - { 404, "Not Found" }, - { 405, "Method Not Allowed" }, - { 406, "Not Acceptable" }, - { 407, "Proxy Authentication Required" }, - { 408, "Request Timeout" }, - { 409, "Conflict" }, - { 410, "Gone" }, - { 411, "Length Required" }, - { 412, "Precondition Failed" }, - { 413, "Request Entity Too Large" }, - { 414, "Request-URI Too Long" }, - { 415, "Unsupported Media Type" }, - { 416, "Requested Range Not Satisfiable" }, - { 417, "Expectation Failed" }, - { 422, "Unprocessable Entity" }, - { 500, "Internal Server Error" }, - { 501, "Not Implemented" }, - { 0, NULL }, -}; - -#define ARRAY_LEN(arr) (sizeof(arr) / sizeof(arr[0])) - -static inline const struct rgw_http_errors *search_err(int err_no, const struct rgw_http_errors *errs, int len) -{ - for (int i = 0; i < len; ++i, ++errs) { - if (err_no == errs->err_no) - return errs; - } - return NULL; -} +extern rgw_http_errors rgw_http_s3_errors; +extern rgw_http_errors rgw_http_swift_errors; static inline int rgw_http_error_to_errno(int http_err) { diff --git a/src/rgw/rgw_iam_policy.cc b/src/rgw/rgw_iam_policy.cc new file mode 100644 index 00000000000..c12e243f1e0 --- /dev/null +++ b/src/rgw/rgw_iam_policy.cc @@ -0,0 +1,1514 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + + +#include <cstring> +#include <regex> +#include <sstream> +#include <stack> +#include <utility> + +#include "rapidjson/reader.h" + +#include "rgw_auth.h" +#include "rgw_iam_policy.h" + +namespace { +constexpr int dout_subsys = ceph_subsys_rgw; +} + +using std::bitset; +using std::find; +using std::int64_t; +using std::move; +using std::pair; +using std::regex; +using std::regex_match; +using std::size_t; +using std::smatch; +using std::string; +using std::stringstream; +using std::ostream; +using std::uint16_t; +using std::uint64_t; +using std::unordered_map; + +using boost::container::flat_set; +using boost::none; +using boost::optional; + +using rapidjson::BaseReaderHandler; +using rapidjson::UTF8; +using rapidjson::SizeType; +using rapidjson::Reader; +using rapidjson::kParseCommentsFlag; +using rapidjson::kParseNumbersAsStringsFlag; +using rapidjson::StringStream; +using rapidjson::ParseResult; + +using rgw::auth::Principal; + +namespace rgw { +namespace IAM { +#include "rgw_iam_policy_keywords.frag.cc" + +struct actpair { + const char* name; + const uint64_t bit; +}; + +namespace { +optional<Partition> to_partition(const smatch::value_type& p, + bool wildcards) { + if (p == "aws") { + return Partition::aws; + } else if (p == "aws-cn") { + return Partition::aws_cn; + } else if (p == "aws-us-gov") { + return Partition::aws_us_gov; + } else if (p == "*" && wildcards) { + return Partition::wildcard; + } else { + return none; + } + + ceph_abort(); +} + +optional<Service> to_service(const smatch::value_type& s, + bool wildcards) { + static const unordered_map<string, Service> services = { + { "acm", Service::acm }, + { "apigateway", Service::apigateway }, + { "appstream", Service::appstream }, + { "artifact", Service::artifact }, + { "autoscaling", Service::autoscaling }, + { "aws-marketplace", Service::aws_marketplace }, + { "aws-marketplace-management", + Service::aws_marketplace_management }, + { "aws-portal", Service::aws_portal }, + { "cloudformation", Service::cloudformation }, + { "cloudfront", Service::cloudfront }, + { "cloudhsm", Service::cloudhsm }, + { "cloudsearch", Service::cloudsearch }, + { "cloudtrail", Service::cloudtrail }, + { "cloudwatch", Service::cloudwatch }, + { "codebuild", Service::codebuild }, + { "codecommit", Service::codecommit }, + { "codedeploy", Service::codedeploy }, + { "codepipeline", Service::codepipeline }, + { "cognito-identity", Service::cognito_identity }, + { "cognito-idp", Service::cognito_idp }, + { "cognito-sync", Service::cognito_sync }, + { "config", Service::config }, + { "datapipeline", Service::datapipeline }, + { "devicefarm", Service::devicefarm }, + { "directconnect", Service::directconnect }, + { "dms", Service::dms }, + { "ds", Service::ds }, + { "dynamodb", Service::dynamodb }, + { "ec2", Service::ec2 }, + { "ecr", Service::ecr }, + { "ecs", Service::ecs }, + { "elasticache", Service::elasticache }, + { "elasticbeanstalk", Service::elasticbeanstalk }, + { "elasticfilesystem", Service::elasticfilesystem }, + { "elasticloadbalancing", Service::elasticloadbalancing }, + { "elasticmapreduce", Service::elasticmapreduce }, + { "elastictranscoder", Service::elastictranscoder }, + { "es", Service::es }, + { "events", Service::events }, + { "firehose", Service::firehose }, + { "gamelift", Service::gamelift }, + { "glacier", Service::glacier }, + { "health", Service::health }, + { "iam", Service::iam }, + { "importexport", Service::importexport }, + { "inspector", Service::inspector }, + { "iot", Service::iot }, + { "kinesis", Service::kinesis }, + { "kinesisanalytics", Service::kinesisanalytics }, + { "kms", Service::kms }, + { "lambda", Service::lambda }, + { "lightsail", Service::lightsail }, + { "logs", Service::logs }, + { "machinelearning", Service::machinelearning }, + { "mobileanalytics", Service::mobileanalytics }, + { "mobilehub", Service::mobilehub }, + { "opsworks", Service::opsworks }, + { "opsworks-cm", Service::opsworks_cm }, + { "polly", Service::polly }, + { "rds", Service::rds }, + { "redshift", Service::redshift }, + { "route53", Service::route53 }, + { "route53domains", Service::route53domains }, + { "s3", Service::s3 }, + { "sdb", Service::sdb }, + { "servicecatalog", Service::servicecatalog }, + { "ses", Service::ses }, + { "sns", Service::sns }, + { "sqs", Service::sqs }, + { "ssm", Service::ssm }, + { "states", Service::states }, + { "storagegateway", Service::storagegateway }, + { "sts", Service::sts }, + { "support", Service::support }, + { "swf", Service::swf }, + { "trustedadvisor", Service::trustedadvisor }, + { "waf", Service::waf }, + { "workmail", Service::workmail }, + { "workspaces", Service::workspaces }}; + + if (wildcards && s == "*") { + return Service::wildcard; + } + + auto i = services.find(s); + if (i == services.end()) { + return none; + } else { + return i->second; + } +} +} + +ARN::ARN(const rgw_obj& o) + : partition(Partition::aws), + service(Service::s3), + region(), + account(o.bucket.tenant), + resource(o.bucket.name) +{ + resource.push_back('/'); + resource.append(o.key.name); +} + +ARN::ARN(const rgw_bucket& b) + : partition(Partition::aws), + service(Service::s3), + region(), + account(b.tenant), + resource(b.name) { } + +ARN::ARN(const rgw_bucket& b, const string& o) + : partition(Partition::aws), + service(Service::s3), + region(), + account(b.tenant), + resource(b.name) { + resource.push_back('/'); + resource.append(o); +} + +optional<ARN> ARN::parse(const string& s, bool wildcards) { + static const regex rx_wild("arn:([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)", + std::regex_constants::ECMAScript | + std::regex_constants::optimize); + static const regex rx_no_wild( + "arn:([^:*]*):([^:*]*):([^:*]*):([^:*]*):([^:*]*)", + std::regex_constants::ECMAScript | + std::regex_constants::optimize); + + smatch match; + + if ((s == "*") && wildcards) { + return ARN(Partition::wildcard, Service::wildcard, "*", "*", "*"); + } else if (regex_match(s, match, wildcards ? rx_wild : rx_no_wild)) { + ceph_assert(match.size() == 6); + + ARN a; + { + auto p = to_partition(match[1], wildcards); + if (!p) + return none; + + a.partition = *p; + } + { + auto s = to_service(match[2], wildcards); + if (!s) { + return none; + } + a.service = *s; + } + + a.region = match[3]; + a.account = match[4]; + a.resource = match[5]; + + return a; + } + return none; +} + +string ARN::to_string() const { + string s; + + if (partition == Partition::aws) { + s.append("aws:"); + } else if (partition == Partition::aws_cn) { + s.append("aws-cn:"); + } else if (partition == Partition::aws_us_gov) { + s.append("aws-us-gov:"); + } else { + s.append("*:"); + } + + static const unordered_map<Service, string> services = { + { Service::acm, "acm" }, + { Service::apigateway, "apigateway" }, + { Service::appstream, "appstream" }, + { Service::artifact, "artifact" }, + { Service::autoscaling, "autoscaling" }, + { Service::aws_marketplace, "aws-marketplace" }, + { Service::aws_marketplace_management, "aws-marketplace-management" }, + { Service::aws_portal, "aws-portal" }, + { Service::cloudformation, "cloudformation" }, + { Service::cloudfront, "cloudfront" }, + { Service::cloudhsm, "cloudhsm" }, + { Service::cloudsearch, "cloudsearch" }, + { Service::cloudtrail, "cloudtrail" }, + { Service::cloudwatch, "cloudwatch" }, + { Service::codebuild, "codebuild" }, + { Service::codecommit, "codecommit" }, + { Service::codedeploy, "codedeploy" }, + { Service::codepipeline, "codepipeline" }, + { Service::cognito_identity, "cognito-identity" }, + { Service::cognito_idp, "cognito-idp" }, + { Service::cognito_sync, "cognito-sync" }, + { Service::config, "config" }, + { Service::datapipeline, "datapipeline" }, + { Service::devicefarm, "devicefarm" }, + { Service::directconnect, "directconnect" }, + { Service::dms, "dms" }, + { Service::ds, "ds" }, + { Service::dynamodb, "dynamodb" }, + { Service::ec2, "ec2" }, + { Service::ecr, "ecr" }, + { Service::ecs, "ecs" }, + { Service::elasticache, "elasticache" }, + { Service::elasticbeanstalk, "elasticbeanstalk" }, + { Service::elasticfilesystem, "elasticfilesystem" }, + { Service::elasticloadbalancing, "elasticloadbalancing" }, + { Service::elasticmapreduce, "elasticmapreduce" }, + { Service::elastictranscoder, "elastictranscoder" }, + { Service::es, "es" }, + { Service::events, "events" }, + { Service::firehose, "firehose" }, + { Service::gamelift, "gamelift" }, + { Service::glacier, "glacier" }, + { Service::health, "health" }, + { Service::iam, "iam" }, + { Service::importexport, "importexport" }, + { Service::inspector, "inspector" }, + { Service::iot, "iot" }, + { Service::kinesis, "kinesis" }, + { Service::kinesisanalytics, "kinesisanalytics" }, + { Service::kms, "kms" }, + { Service::lambda, "lambda" }, + { Service::lightsail, "lightsail" }, + { Service::logs, "logs" }, + { Service::machinelearning, "machinelearning" }, + { Service::mobileanalytics, "mobileanalytics" }, + { Service::mobilehub, "mobilehub" }, + { Service::opsworks, "opsworks" }, + { Service::opsworks_cm, "opsworks-cm" }, + { Service::polly, "polly" }, + { Service::rds, "rds" }, + { Service::redshift, "redshift" }, + { Service::route53, "route53" }, + { Service::route53domains, "route53domains" }, + { Service::s3, "s3" }, + { Service::sdb, "sdb" }, + { Service::servicecatalog, "servicecatalog" }, + { Service::ses, "ses" }, + { Service::sns, "sns" }, + { Service::sqs, "sqs" }, + { Service::ssm, "ssm" }, + { Service::states, "states" }, + { Service::storagegateway, "storagegateway" }, + { Service::sts, "sts" }, + { Service::support, "support" }, + { Service::swf, "swf" }, + { Service::trustedadvisor, "trustedadvisor" }, + { Service::waf, "waf" }, + { Service::workmail, "workmail" }, + { Service::workspaces, "workspaces" }}; + + auto i = services.find(service); + if (i != services.end()) { + s.append(i->second); + } else { + s.push_back('*'); + } + s.push_back(':'); + + s.append(region); + s.push_back(':'); + + s.append(account); + s.push_back(':'); + + s.append(resource); + + return s; +} + +bool operator ==(const ARN& l, const ARN& r) { + return ((l.partition == r.partition) && + (l.service == r.service) && + (l.region == r.region) && + (l.account == r.account) && + (l.resource == r.resource)); +} +bool operator <(const ARN& l, const ARN& r) { + return ((l.partition < r.partition) || + (l.service < r.service) || + (l.region < r.region) || + (l.account < r.account) || + (l.resource < r.resource)); +} + +// The candidate is not allowed to have wildcards. The only way to +// do that sanely would be to use unification rather than matching. +bool ARN::match(const ARN& candidate) const { + if ((candidate.partition == Partition::wildcard) || + (partition != candidate.partition && partition + != Partition::wildcard)) { + return false; + } + + if ((candidate.service == Service::wildcard) || + (service != candidate.service && service != Service::wildcard)) { + return false; + } + + if (!::match(region, candidate.region, MATCH_POLICY_ARN)) { + return false; + } + + if (!::match(account, candidate.account, MATCH_POLICY_ARN)) { + return false; + } + + if (!::match(resource, candidate.resource, MATCH_POLICY_ARN)) { + return false; + } + + return true; +} + +static const actpair actpairs[] = +{{ "s3:AbortMultipartUpload", s3AbortMultipartUpload }, + { "s3:CreateBucket", s3CreateBucket }, + { "s3:DeleteBucketPolicy", s3DeleteBucketPolicy }, + { "s3:DeleteBucket", s3DeleteBucket }, + { "s3:DeleteBucketWebsite", s3DeleteBucketWebsite }, + { "s3:DeleteObject", s3DeleteObject }, + { "s3:DeleteObjectVersion", s3DeleteObjectVersion }, + { "s3:DeleteReplicationConfiguration", s3DeleteReplicationConfiguration }, + { "s3:GetAccelerateConfiguration", s3GetAccelerateConfiguration }, + { "s3:GetBucketAcl", s3GetBucketAcl }, + { "s3:GetBucketCORS", s3GetBucketCORS }, + { "s3:GetBucketLocation", s3GetBucketLocation }, + { "s3:GetBucketLogging", s3GetBucketLogging }, + { "s3:GetBucketNotification", s3GetBucketNotification }, + { "s3:GetBucketPolicy", s3GetBucketPolicy }, + { "s3:GetBucketRequestPayment", s3GetBucketRequestPayment }, + { "s3:GetBucketTagging", s3GetBucketTagging }, + { "s3:GetBucketVersioning", s3GetBucketVersioning }, + { "s3:GetBucketWebsite", s3GetBucketWebsite }, + { "s3:GetLifecycleConfiguration", s3GetLifecycleConfiguration }, + { "s3:GetObjectAcl", s3GetObjectAcl }, + { "s3:GetObject", s3GetObject }, + { "s3:GetObjectTorrent", s3GetObjectTorrent }, + { "s3:GetObjectVersionAcl", s3GetObjectVersionAcl }, + { "s3:GetObjectVersion", s3GetObjectVersion }, + { "s3:GetObjectVersionTorrent", s3GetObjectVersionTorrent }, + { "s3:GetReplicationConfiguration", s3GetReplicationConfiguration }, + { "s3:ListAllMyBuckets", s3ListAllMyBuckets }, + { "s3:ListBucketMultiPartUploads", s3ListBucketMultiPartUploads }, + { "s3:ListBucket", s3ListBucket }, + { "s3:ListBucketVersions", s3ListBucketVersions }, + { "s3:ListMultipartUploadParts", s3ListMultipartUploadParts }, + { "s3:PutAccelerateConfiguration", s3PutAccelerateConfiguration }, + { "s3:PutBucketAcl", s3PutBucketAcl }, + { "s3:PutBucketCORS", s3PutBucketCORS }, + { "s3:PutBucketLogging", s3PutBucketLogging }, + { "s3:PutBucketNotification", s3PutBucketNotification }, + { "s3:PutBucketPolicy", s3PutBucketPolicy }, + { "s3:PutBucketRequestPayment", s3PutBucketRequestPayment }, + { "s3:PutBucketTagging", s3PutBucketTagging }, + { "s3:PutBucketVersioning", s3PutBucketVersioning }, + { "s3:PutBucketWebsite", s3PutBucketWebsite }, + { "s3:PutLifecycleConfiguration", s3PutLifecycleConfiguration }, + { "s3:PutObjectAcl", s3PutObjectAcl }, + { "s3:PutObject", s3PutObject }, + { "s3:PutObjectVersionAcl", s3PutObjectVersionAcl }, + { "s3:PutReplicationConfiguration", s3PutReplicationConfiguration }, + { "s3:RestoreObject", s3RestoreObject }}; + +struct PolicyParser; + +const Keyword top[1]{"<Top>", TokenKind::pseudo, TokenID::Top, 0, false, + false}; +const Keyword cond_key[1]{"<Condition Key>", TokenKind::cond_key, + TokenID::CondKey, 0, true, false}; + +struct ParseState { + PolicyParser* pp; + const Keyword* w; + + bool arraying = false; + bool objecting = false; + + void reset(); + + ParseState(PolicyParser* pp, const Keyword* w) + : pp(pp), w(w) {} + + bool obj_start(); + + bool obj_end(); + + bool array_start() { + if (w->arrayable && !arraying) { + arraying = true; + return true; + } + return false; + } + + bool array_end(); + + bool key(const char* s, size_t l); + bool do_string(CephContext* cct, const char* s, size_t l); + bool number(const char* str, size_t l); +}; + +// If this confuses you, look up the Curiously Recurring Template Pattern +struct PolicyParser : public BaseReaderHandler<UTF8<>, PolicyParser> { + keyword_hash tokens; + std::vector<ParseState> s; + CephContext* cct; + const string& tenant; + Policy& policy; + + uint32_t seen = 0; + + uint32_t dex(TokenID in) const { + switch (in) { + case TokenID::Version: + return 0x1; + case TokenID::Id: + return 0x2; + case TokenID::Statement: + return 0x4; + case TokenID::Sid: + return 0x8; + case TokenID::Effect: + return 0x10; + case TokenID::Principal: + return 0x20; + case TokenID::NotPrincipal: + return 0x40; + case TokenID::Action: + return 0x80; + case TokenID::NotAction: + return 0x100; + case TokenID::Resource: + return 0x200; + case TokenID::NotResource: + return 0x400; + case TokenID::Condition: + return 0x800; + case TokenID::AWS: + return 0x1000; + case TokenID::Federated: + return 0x2000; + case TokenID::Service: + return 0x4000; + case TokenID::CanonicalUser: + return 0x8000; + default: + ceph_abort(); + } + } + bool test(TokenID in) { + return seen & dex(in); + } + void set(TokenID in) { + seen |= dex(in); + } + void set(std::initializer_list<TokenID> l) { + for (auto in : l) { + seen |= dex(in); + } + } + void reset(TokenID in) { + seen &= ~dex(in); + } + void reset(std::initializer_list<TokenID> l) { + for (auto in : l) { + seen &= ~dex(in); + } + } + + PolicyParser(CephContext* cct, const string& tenant, Policy& policy) + : cct(cct), tenant(tenant), policy(policy) {} + PolicyParser(const PolicyParser& policy) = delete; + + bool StartObject() { + if (s.empty()) { + s.push_back({this, top}); + s.back().objecting = true; + return true; + } + + return s.back().obj_start(); + } + bool EndObject(SizeType memberCount) { + if (s.empty()) { + return false; + } + + return s.back().obj_end(); + } + bool Key(const char* str, SizeType length, bool copy) { + if (s.empty()) { + return false; + } + + return s.back().key(str, length); + } + + bool String(const char* str, SizeType length, bool copy) { + if (s.empty()) { + return false; + } + + return s.back().do_string(cct, str, length); + } + bool RawNumber(const char* str, SizeType length, bool copy) { + if (s.empty()) { + return false; + } + + return s.back().number(str, length); + } + bool StartArray() { + if (s.empty()) { + return false; + } + + return s.back().array_start(); + } + bool EndArray(SizeType) { + if (s.empty()) { + return false; + } + + return s.back().array_end(); + } + + bool Default() { + return false; + } +}; + + +// I really despise this misfeature of C++. +// +bool ParseState::obj_end() { + if (objecting) { + objecting = false; + if (!arraying) { + pp->s.pop_back(); + } else { + reset(); + } + return true; + } + return false; +} + +bool ParseState::key(const char* s, size_t l) { + auto k = pp->tokens.lookup(s, l); + + if (!k) { + if (w->kind == TokenKind::cond_op) { + auto& t = pp->policy.statements.back(); + pp->s.emplace_back(pp, cond_key); + t.conditions.emplace_back(w->id, s, l); + return true; + } else { + return false; + } + } + + // If the token we're going with belongs within the condition at the + // top of the stack and we haven't already encountered it, push it + // on the stack + + // Top + if ((((w->id == TokenID::Top) && (k->kind == TokenKind::top)) || + // Statement + ((w->id == TokenID::Statement) && (k->kind == TokenKind::statement)) || + + /// Principal + ((w->id == TokenID::Principal || w->id == TokenID::NotPrincipal) && + (k->kind == TokenKind::princ_type))) && + + // Check that it hasn't been encountered. Note that this + // conjoins with the run of disjunctions above. + !pp->test(k->id)) { + pp->set(k->id); + pp->s.emplace_back(pp, k); + return true; + } else if ((w->id == TokenID::Condition) && + (k->kind == TokenKind::cond_op)) { + pp->s.emplace_back(pp, k); + return true; + } + return false; +} + +// I should just rewrite a few helper functions to use iterators, +// which will make all of this ever so much nicer. +static optional<Principal> parse_principal(CephContext* cct, TokenID t, + string&& s) { + // Wildcard! + if ((t == TokenID::AWS) && (s == "*")) { + return Principal::wildcard(); + + // Do nothing for now. + } else if (t == TokenID::CanonicalUser) { + + // AWS ARNs + } else if (t == TokenID::AWS) { + auto a = ARN::parse(s); + if (!a) { + if (std::none_of(s.begin(), s.end(), + [](const char& c) { + return (c == ':') || (c == '/'); + })) { + // Since tenants are simply prefixes, there's no really good + // way to see if one exists or not. So we return the thing and + // let them try to match against it. + return Principal::tenant(std::move(s)); + } + } + + if (a->resource == "root") { + return Principal::tenant(std::move(a->account)); + } + + static const regex rx("([^/]*)/(.*)", + std::regex_constants::ECMAScript | + std::regex_constants::optimize); + smatch match; + if (regex_match(a->resource, match, rx)) { + ceph_assert(match.size() == 2); + + if (match[1] == "user") { + return Principal::user(std::move(a->account), + match[2]); + } + + if (match[1] == "role") { + return Principal::role(std::move(a->account), + match[2]); + } + } + } + + ldout(cct, 0) << "Supplied principal is discarded: " << s << dendl; + return boost::none; +} + +bool ParseState::do_string(CephContext* cct, const char* s, size_t l) { + auto k = pp->tokens.lookup(s, l); + Policy& p = pp->policy; + Statement* t = p.statements.empty() ? nullptr : &(p.statements.back()); + + // Top level! + if ((w->id == TokenID::Version) && k && + k->kind == TokenKind::version_key) { + p.version = static_cast<Version>(k->specific); + } else if (w->id == TokenID::Id) { + p.id = string(s, l); + + // Statement + + } else if (w->id == TokenID::Sid) { + t->sid.emplace(s, l); + } else if ((w->id == TokenID::Effect) && + k->kind == TokenKind::effect_key) { + t->effect = static_cast<Effect>(k->specific); + } else if (w->id == TokenID::Principal && s && *s == '*') { + t->princ.emplace(Principal::wildcard()); + } else if (w->id == TokenID::NotPrincipal && s && *s == '*') { + t->noprinc.emplace(Principal::wildcard()); + } else if ((w->id == TokenID::Action) || + (w->id == TokenID::NotAction)) { + for (auto& p : actpairs) { + if (match({s, l}, p.name, MATCH_POLICY_ACTION)) { + (w->id == TokenID::Action ? t->action : t->notaction) |= p.bit; + } + } + } else if (w->id == TokenID::Resource || w->id == TokenID::NotResource) { + auto a = ARN::parse({s, l}, true); + // You can't specify resources for someone ELSE'S account. + if (a && (a->account.empty() || a->account == pp->tenant || + a->account == "*")) { + if (a->account.empty() || a->account == "*") + a->account = pp->tenant; + (w->id == TokenID::Resource ? t->resource : t->notresource) + .emplace(std::move(*a)); + } + else + ldout(cct, 0) << "Supplied resource is discarded: " << string(s, l) + << dendl; + } else if (w->kind == TokenKind::cond_key) { + auto& t = pp->policy.statements.back(); + t.conditions.back().vals.emplace_back(s, l); + + // Principals + + } else if (w->kind == TokenKind::princ_type) { + ceph_assert(pp->s.size() > 1); + auto& pri = pp->s[pp->s.size() - 2].w->id == TokenID::Principal ? + t->princ : t->noprinc; + + auto o = parse_principal(pp->cct, w->id, string(s, l)); + if (o) + pri.emplace(std::move(*o)); + + // Failure + + } else { + return false; + } + + if (!arraying) { + pp->s.pop_back(); + } + + return true; +} + +bool ParseState::number(const char* s, size_t l) { + // Top level! + if (w->kind == TokenKind::cond_key) { + auto& t = pp->policy.statements.back(); + t.conditions.back().vals.emplace_back(s, l); + + // Failure + + } else { + return false; + } + + if (!arraying) { + pp->s.pop_back(); + } + + return true; +} + +void ParseState::reset() { + pp->reset({TokenID::Sid, TokenID::Effect, TokenID::Principal, + TokenID::NotPrincipal, TokenID::Action, TokenID::NotAction, + TokenID::Resource, TokenID::NotResource, TokenID::Condition}); +} + +bool ParseState::obj_start() { + if (w->objectable && !objecting) { + objecting = true; + if (w->id == TokenID::Statement) { + pp->policy.statements.push_back({}); + } + + return true; + } + + return false; +} + + +bool ParseState::array_end() { + if (arraying && !objecting) { + pp->s.pop_back(); + return true; + } + + return false; +} + +ostream& operator <<(ostream& m, const MaskedIP& ip) { + // I have a theory about why std::bitset is the way it is. + if (ip.v6) { + for (int i = 15; i >= 0; --i) { + uint8_t b = 0; + for (int j = 7; j >= 0; --j) { + b |= (ip.addr[(i * 8) + j] << j); + } + m << hex << b; + if (i != 0) { + m << "::"; + } + } + } else { + // It involves Satan. + for (int i = 3; i >= 0; --i) { + uint8_t b = 0; + for (int j = 7; j >= 0; --j) { + b |= (ip.addr[(i * 8) + j] << j); + } + m << b; + if (i != 0) { + m << "."; + } + } + } + m << "/" << ip.prefix; + // It would explain a lot + return m; +} + +string to_string(const MaskedIP& m) { + stringstream ss; + ss << m; + return ss.str(); +} + +bool Condition::eval(const Environment& env) const { + auto i = env.find(key); + if (op == TokenID::Null) { + return i == env.end() ? true : false; + } + + if (i == env.end()) { + return false; + } + const auto& s = i->second; + + switch (op) { + // String! + case TokenID::StringEquals: + return orrible(std::equal_to<std::string>(), s, vals); + + case TokenID::StringNotEquals: + return orrible(std::not2(std::equal_to<std::string>()), + s, vals); + + case TokenID::StringEqualsIgnoreCase: + return orrible(ci_equal_to(), s, vals); + + case TokenID::StringNotEqualsIgnoreCase: + return orrible(std::not2(ci_equal_to()), s, vals); + + // Implement actual StringLike with wildcarding later + case TokenID::StringLike: + return orrible(std::equal_to<std::string>(), s, vals); + case TokenID::StringNotLike: + return orrible(std::not2(std::equal_to<std::string>()), + s, vals); + + // Numeric + case TokenID::NumericEquals: + return shortible(std::equal_to<double>(), as_number, s, vals); + + case TokenID::NumericNotEquals: + return shortible(std::not2(std::equal_to<double>()), + as_number, s, vals); + + + case TokenID::NumericLessThan: + return shortible(std::less<double>(), as_number, s, vals); + + + case TokenID::NumericLessThanEquals: + return shortible(std::less_equal<double>(), as_number, s, vals); + + case TokenID::NumericGreaterThan: + return shortible(std::greater<double>(), as_number, s, vals); + + case TokenID::NumericGreaterThanEquals: + return shortible(std::greater_equal<double>(), as_number, s, vals); + + // Date! + case TokenID::DateEquals: + return shortible(std::equal_to<ceph::real_time>(), as_date, s, vals); + + case TokenID::DateNotEquals: + return shortible(std::not2(std::equal_to<ceph::real_time>()), + as_date, s, vals); + + case TokenID::DateLessThan: + return shortible(std::less<ceph::real_time>(), as_date, s, vals); + + + case TokenID::DateLessThanEquals: + return shortible(std::less_equal<ceph::real_time>(), as_date, s, vals); + + case TokenID::DateGreaterThan: + return shortible(std::greater<ceph::real_time>(), as_date, s, vals); + + case TokenID::DateGreaterThanEquals: + return shortible(std::greater_equal<ceph::real_time>(), as_date, s, + vals); + + // Bool! + case TokenID::Bool: + return shortible(std::equal_to<bool>(), as_bool, s, vals); + + // Binary! + case TokenID::BinaryEquals: + return shortible(std::equal_to<ceph::bufferlist>(), as_binary, s, + vals); + + // IP Address! + case TokenID::IpAddress: + return shortible(std::equal_to<MaskedIP>(), as_network, s, vals); + + case TokenID::NotIpAddress: + return shortible(std::not2(std::equal_to<MaskedIP>()), as_network, s, + vals); + +#if 0 + // Amazon Resource Names! (Does S3 need this?) + TokenID::ArnEquals, TokenID::ArnNotEquals, TokenID::ArnLike, + TokenID::ArnNotLike, +#endif + + default: + return false; + } +} + +optional<MaskedIP> Condition::as_network(const string& s) { + MaskedIP m; + if (s.empty()) { + return none; + } + + m.v6 = s.find(':'); + auto slash = s.find('/'); + if (slash == string::npos) { + m.prefix = m.v6 ? 128 : 32; + } else { + char* end = 0; + m.prefix = strtoul(s.data() + slash + 1, &end, 10); + if (*end != 0 || (m.v6 && m.prefix > 128) || + (!m.v6 && m.prefix > 32)) { + return none; + } + } + + string t; + auto p = &s; + + if (slash != string::npos) { + t.assign(s, 0, slash); + p = &t; + } + + if (m.v6) { + struct sockaddr_in6 a; + if (inet_pton(AF_INET6, p->c_str(), static_cast<void*>(&a)) != 1) { + return none; + } + + m.addr |= Address(a.sin6_addr.s6_addr[0]) << 0; + m.addr |= Address(a.sin6_addr.s6_addr[1]) << 8; + m.addr |= Address(a.sin6_addr.s6_addr[2]) << 16; + m.addr |= Address(a.sin6_addr.s6_addr[3]) << 24; + m.addr |= Address(a.sin6_addr.s6_addr[4]) << 32; + m.addr |= Address(a.sin6_addr.s6_addr[5]) << 40; + m.addr |= Address(a.sin6_addr.s6_addr[6]) << 48; + m.addr |= Address(a.sin6_addr.s6_addr[7]) << 56; + m.addr |= Address(a.sin6_addr.s6_addr[8]) << 64; + m.addr |= Address(a.sin6_addr.s6_addr[9]) << 72; + m.addr |= Address(a.sin6_addr.s6_addr[10]) << 80; + m.addr |= Address(a.sin6_addr.s6_addr[11]) << 88; + m.addr |= Address(a.sin6_addr.s6_addr[12]) << 96; + m.addr |= Address(a.sin6_addr.s6_addr[13]) << 104; + m.addr |= Address(a.sin6_addr.s6_addr[14]) << 112; + m.addr |= Address(a.sin6_addr.s6_addr[15]) << 120; + } else { + struct sockaddr_in a; + if (inet_pton(AF_INET, p->c_str(), static_cast<void*>(&a)) != 1) { + return none; + } + m.addr = ntohl(a.sin_addr.s_addr); + } + + return none; +} + +namespace { +const char* condop_string(const TokenID t) { + switch (t) { + case TokenID::StringEquals: + return "StringEquals"; + + case TokenID::StringNotEquals: + return "StringNotEquals"; + + case TokenID::StringEqualsIgnoreCase: + return "StringEqualsIgnoreCase"; + + case TokenID::StringNotEqualsIgnoreCase: + return "StringNotEqualsIgnoreCase"; + + case TokenID::StringLike: + return "StringLike"; + + case TokenID::StringNotLike: + return "StringNotLike"; + + // Numeric! + case TokenID::NumericEquals: + return "NumericEquals"; + + case TokenID::NumericNotEquals: + return "NumericNotEquals"; + + case TokenID::NumericLessThan: + return "NumericLessThan"; + + case TokenID::NumericLessThanEquals: + return "NumericLessThanEquals"; + + case TokenID::NumericGreaterThan: + return "NumericGreaterThan"; + + case TokenID::NumericGreaterThanEquals: + return "NumericGreaterThanEquals"; + + case TokenID::DateEquals: + return "DateEquals"; + + case TokenID::DateNotEquals: + return "DateNotEquals"; + + case TokenID::DateLessThan: + return "DateLessThan"; + + case TokenID::DateLessThanEquals: + return "DateLessThanEquals"; + + case TokenID::DateGreaterThan: + return "DateGreaterThan"; + + case TokenID::DateGreaterThanEquals: + return "DateGreaterThanEquals"; + + case TokenID::Bool: + return "Bool"; + + case TokenID::BinaryEquals: + return "BinaryEquals"; + + case TokenID::IpAddress: + return "case TokenID::IpAddress"; + + case TokenID::NotIpAddress: + return "NotIpAddress"; + + case TokenID::ArnEquals: + return "ArnEquals"; + + case TokenID::ArnNotEquals: + return "ArnNotEquals"; + + case TokenID::ArnLike: + return "ArnLike"; + + case TokenID::ArnNotLike: + return "ArnNotLike"; + + case TokenID::Null: + return "Null"; + + default: + return "InvalidConditionOperator"; + } +} + +template<typename Iterator> +ostream& print_array(ostream& m, Iterator begin, Iterator end) { + if (begin == end) { + m << "["; + } else { + auto beforelast = end - 1; + m << "[ "; + for (auto i = begin; i != end; ++i) { + m << *i; + if (i != beforelast) { + m << ", "; + } else { + m << " "; + } + } + } + m << "]"; + return m; +} +} + +ostream& operator <<(ostream& m, const Condition& c) { + m << "{ " << condop_string(c.op) << ": { " << c.key; + if (c.ifexists) { + m << "IfExists"; + } + print_array(m, c.vals.cbegin(), c.vals.cend()); + return m << "}"; +} + +string to_string(const Condition& c) { + stringstream ss; + ss << c; + return ss.str(); +} + +Effect Statement::eval(const Environment& e, + optional<const rgw::auth::Identity&> ida, + uint64_t act, const ARN& res) const { + if (ida && (!ida->is_identity(princ) || ida->is_identity(noprinc))) { + return Effect::Pass; + } + + + if (!std::any_of(resource.begin(), resource.end(), + [&res](const ARN& pattern) { + return pattern.match(res); + }) || + (std::any_of(notresource.begin(), notresource.end(), + [&res](const ARN& pattern) { + return pattern.match(res); + }))) { + return Effect::Pass; + } + + if (!(action & act) || (notaction & act)) { + return Effect::Pass; + } + + if (std::all_of(conditions.begin(), + conditions.end(), + [&e](const Condition& c) { return c.eval(e);})) { + return effect; + } + + return Effect::Pass; +} + +namespace { +const char* action_bit_string(uint64_t action) { + switch (action) { + case s3GetObject: + return "s3:GetObject"; + + case s3GetObjectVersion: + return "s3:GetObjectVersion"; + + case s3PutObject: + return "s3:PutObject"; + + case s3GetObjectAcl: + return "s3:GetObjectAcl"; + + case s3GetObjectVersionAcl: + return "s3:GetObjectVersionAcl"; + + case s3PutObjectAcl: + return "s3:PutObjectAcl"; + + case s3PutObjectVersionAcl: + return "s3:PutObjectVersionAcl"; + + case s3DeleteObject: + return "s3:DeleteObject"; + + case s3DeleteObjectVersion: + return "s3:DeleteObjectVersion"; + + case s3ListMultipartUploadParts: + return "s3:ListMultipartUploadParts"; + + case s3AbortMultipartUpload: + return "s3:AbortMultipartUpload"; + + case s3GetObjectTorrent: + return "s3:GetObjectTorrent"; + + case s3GetObjectVersionTorrent: + return "s3:GetObjectVersionTorrent"; + + case s3RestoreObject: + return "s3:RestoreObject"; + + case s3CreateBucket: + return "s3:CreateBucket"; + + case s3DeleteBucket: + return "s3:DeleteBucket"; + + case s3ListBucket: + return "s3:ListBucket"; + + case s3ListBucketVersions: + return "s3:ListBucketVersions"; + case s3ListAllMyBuckets: + return "s3:ListAllMyBuckets"; + + case s3ListBucketMultiPartUploads: + return "s3:ListBucketMultiPartUploads"; + + case s3GetAccelerateConfiguration: + return "s3:GetAccelerateConfiguration"; + + case s3PutAccelerateConfiguration: + return "s3:PutAccelerateConfiguration"; + + case s3GetBucketAcl: + return "s3:GetBucketAcl"; + + case s3PutBucketAcl: + return "s3:PutBucketAcl"; + + case s3GetBucketCORS: + return "s3:GetBucketCORS"; + + case s3PutBucketCORS: + return "s3:PutBucketCORS"; + + case s3GetBucketVersioning: + return "s3:GetBucketVersioning"; + + case s3PutBucketVersioning: + return "s3:PutBucketVersioning"; + + case s3GetBucketRequestPayment: + return "s3:GetBucketRequestPayment"; + + case s3PutBucketRequestPayment: + return "s3:PutBucketRequestPayment"; + + case s3GetBucketLocation: + return "s3:GetBucketLocation"; + + case s3GetBucketPolicy: + return "s3:GetBucketPolicy"; + + case s3DeleteBucketPolicy: + return "s3:DeleteBucketPolicy"; + + case s3PutBucketPolicy: + return "s3:PutBucketPolicy"; + + case s3GetBucketNotification: + return "s3:GetBucketNotification"; + + case s3PutBucketNotification: + return "s3:PutBucketNotification"; + + case s3GetBucketLogging: + return "s3:GetBucketLogging"; + + case s3PutBucketLogging: + return "s3:PutBucketLogging"; + + case s3GetBucketTagging: + return "s3:GetBucketTagging"; + + case s3PutBucketTagging: + return "s3:PutBucketTagging"; + + case s3GetBucketWebsite: + return "s3:GetBucketWebsite"; + + case s3PutBucketWebsite: + return "s3:PutBucketWebsite"; + + case s3DeleteBucketWebsite: + return "s3:DeleteBucketWebsite"; + + case s3GetLifecycleConfiguration: + return "s3:GetLifecycleConfiguration"; + + case s3PutLifecycleConfiguration: + return "s3:PutLifecycleConfiguration"; + + case s3PutReplicationConfiguration: + return "s3:PutReplicationConfiguration"; + + case s3GetReplicationConfiguration: + return "s3:GetReplicationConfiguration"; + + case s3DeleteReplicationConfiguration: + return "s3:DeleteReplicationConfiguration"; + } + return "s3Invalid"; +} + +ostream& print_actions(ostream& m, const uint64_t a) { + bool begun = false; + m << "[ "; + for (auto i = 0U; i < s3Count; ++i) { + if (a & (1 << i)) { + if (begun) { + m << ", "; + } else { + begun = true; + } + m << action_bit_string(1 << i); + } + } + if (begun) { + m << " ]"; + } else { + m << "]"; + } + return m; +} +} + +ostream& operator <<(ostream& m, const Statement& s) { + m << "{ "; + if (s.sid) { + m << "Sid: " << *s.sid << ", "; + } + if (!s.princ.empty()) { + m << "Principal: "; + print_array(m, s.princ.cbegin(), s.princ.cend()); + m << ", "; + } + if (!s.noprinc.empty()) { + m << "NotPrincipal: "; + print_array(m, s.noprinc.cbegin(), s.noprinc.cend()); + m << ", "; + } + + m << "Effect: " << + (s.effect == Effect::Allow ? + (const char*) "Allow" : + (const char*) "Deny"); + + if (s.action || s.notaction || !s.resource.empty() || + !s.notresource.empty() || !s.conditions.empty()) { + m << ", "; + } + + if (s.action) { + m << "Action: "; + print_actions(m, s.action); + + if (s.notaction || !s.resource.empty() || + !s.notresource.empty() || !s.conditions.empty()) { + m << ", "; + } + } + + if (s.notaction) { + m << "NotAction: "; + print_actions(m, s.notaction); + + if (!s.resource.empty() || !s.notresource.empty() || + !s.conditions.empty()) { + m << ", "; + } + } + + if (!s.resource.empty()) { + m << "Resource: "; + print_array(m, s.resource.cbegin(), s.resource.cend()); + + if (!s.notresource.empty() || !s.conditions.empty()) { + m << ", "; + } + } + + if (!s.notresource.empty()) { + m << "NotResource: "; + print_array(m, s.notresource.cbegin(), s.notresource.cend()); + + if (!s.conditions.empty()) { + m << ", "; + } + } + + if (!s.conditions.empty()) { + m << "Condition: "; + print_array(m, s.conditions.cbegin(), s.conditions.cend()); + } + + return m << " }"; +} + +string to_string(const Statement& s) { + stringstream m; + m << s; + return m.str(); +} + +Policy::Policy(CephContext* cct, const string& tenant, + const bufferlist& _text) + : text(_text.to_str()) { + StringStream ss(text.data()); + PolicyParser pp(cct, tenant, *this); + auto pr = Reader{}.Parse<kParseNumbersAsStringsFlag | + kParseCommentsFlag>(ss, pp); + if (!pr) { + throw PolicyParseException(std::move(pr)); + } +} + +Effect Policy::eval(const Environment& e, + optional<const rgw::auth::Identity&> ida, + std::uint64_t action, const ARN& resource) const { + auto allowed = false; + for (auto& s : statements) { + auto g = s.eval(e, ida, action, resource); + if (g == Effect::Deny) { + return g; + } else if (g == Effect::Allow) { + allowed = true; + } + } + return allowed ? Effect::Allow : Effect::Pass; +} + +ostream& operator <<(ostream& m, const Policy& p) { + m << "{ Version: " + << (p.version == Version::v2008_10_17 ? "2008-10-17" : "2012-10-17"); + + if (p.id || !p.statements.empty()) { + m << ", "; + } + + if (p.id) { + m << "Id: " << *p.id; + if (!p.statements.empty()) { + m << ", "; + } + } + + if (!p.statements.empty()) { + m << "Statements: "; + print_array(m, p.statements.cbegin(), p.statements.cend()); + m << ", "; + } + return m << " }"; +} + +string to_string(const Policy& p) { + stringstream s; + s << p; + return s.str(); +} + +} +} diff --git a/src/rgw/rgw_iam_policy.h b/src/rgw/rgw_iam_policy.h new file mode 100644 index 00000000000..4429a574309 --- /dev/null +++ b/src/rgw/rgw_iam_policy.h @@ -0,0 +1,466 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RGW_IAM_POLICY_H +#define CEPH_RGW_IAM_POLICY_H + +#include <bitset> +#include <chrono> +#include <cstdint> +#include <iostream> +#include <string> + +#include <boost/algorithm/string/predicate.hpp> +#include <boost/container/flat_map.hpp> +#include <boost/container/flat_set.hpp> +#include <boost/optional.hpp> +#include <boost/thread/shared_mutex.hpp> +#include <boost/utility/string_ref.hpp> +#include <boost/variant.hpp> + +#include "common/ceph_time.h" +#include "common/iso_8601.h" + +#include "rapidjson/error/error.h" +#include "rapidjson/error/en.h" + +#include "fnmatch.h" + +#include "rgw_basic_types.h" +#include "rgw_iam_policy_keywords.h" + +#include "include/assert.h" // razzin' frazzin' ...grrr. + +class RGWRados; +namespace rgw { +namespace auth { +class Identity; +} +} +struct rgw_obj; +struct rgw_bucket; + +namespace rgw { +namespace IAM { +static constexpr std::uint64_t s3None = 0; +static constexpr std::uint64_t s3GetObject = 1ULL << 0; +static constexpr std::uint64_t s3GetObjectVersion = 1ULL << 1; +static constexpr std::uint64_t s3PutObject = 1ULL << 2; +static constexpr std::uint64_t s3GetObjectAcl = 1ULL << 3; +static constexpr std::uint64_t s3GetObjectVersionAcl = 1ULL << 4; +static constexpr std::uint64_t s3PutObjectAcl = 1ULL << 5; +static constexpr std::uint64_t s3PutObjectVersionAcl = 1ULL << 6; +static constexpr std::uint64_t s3DeleteObject = 1ULL << 7; +static constexpr std::uint64_t s3DeleteObjectVersion = 1ULL << 8; +static constexpr std::uint64_t s3ListMultipartUploadParts = 1ULL << 9; +static constexpr std::uint64_t s3AbortMultipartUpload = 1ULL << 10; +static constexpr std::uint64_t s3GetObjectTorrent = 1ULL << 11; +static constexpr std::uint64_t s3GetObjectVersionTorrent = 1ULL << 12; +static constexpr std::uint64_t s3RestoreObject = 1ULL << 13; +static constexpr std::uint64_t s3CreateBucket = 1ULL << 14; +static constexpr std::uint64_t s3DeleteBucket = 1ULL << 15; +static constexpr std::uint64_t s3ListBucket = 1ULL << 16; +static constexpr std::uint64_t s3ListBucketVersions = 1ULL << 17; +static constexpr std::uint64_t s3ListAllMyBuckets = 1ULL << 18; +static constexpr std::uint64_t s3ListBucketMultiPartUploads = 1ULL << 19; +static constexpr std::uint64_t s3GetAccelerateConfiguration = 1ULL << 20; +static constexpr std::uint64_t s3PutAccelerateConfiguration = 1ULL << 21; +static constexpr std::uint64_t s3GetBucketAcl = 1ULL << 22; +static constexpr std::uint64_t s3PutBucketAcl = 1ULL << 23; +static constexpr std::uint64_t s3GetBucketCORS = 1ULL << 24; +static constexpr std::uint64_t s3PutBucketCORS = 1ULL << 25; +static constexpr std::uint64_t s3GetBucketVersioning = 1ULL << 26; +static constexpr std::uint64_t s3PutBucketVersioning = 1ULL << 27; +static constexpr std::uint64_t s3GetBucketRequestPayment = 1ULL << 28; +static constexpr std::uint64_t s3PutBucketRequestPayment = 1ULL << 29; +static constexpr std::uint64_t s3GetBucketLocation = 1ULL << 30; +static constexpr std::uint64_t s3GetBucketPolicy = 1ULL << 31; +static constexpr std::uint64_t s3DeleteBucketPolicy = 1ULL << 32; +static constexpr std::uint64_t s3PutBucketPolicy = 1ULL << 33; +static constexpr std::uint64_t s3GetBucketNotification = 1ULL << 34; +static constexpr std::uint64_t s3PutBucketNotification = 1ULL << 35; +static constexpr std::uint64_t s3GetBucketLogging = 1ULL << 36; +static constexpr std::uint64_t s3PutBucketLogging = 1ULL << 37; +static constexpr std::uint64_t s3GetBucketTagging = 1ULL << 38; +static constexpr std::uint64_t s3PutBucketTagging = 1ULL << 39; +static constexpr std::uint64_t s3GetBucketWebsite = 1ULL << 40; +static constexpr std::uint64_t s3PutBucketWebsite = 1ULL << 41; +static constexpr std::uint64_t s3DeleteBucketWebsite = 1ULL << 42; +static constexpr std::uint64_t s3GetLifecycleConfiguration = 1ULL << 43; +static constexpr std::uint64_t s3PutLifecycleConfiguration = 1ULL << 44; +static constexpr std::uint64_t s3PutReplicationConfiguration = 1ULL << 45; +static constexpr std::uint64_t s3GetReplicationConfiguration = 1ULL << 46; +static constexpr std::uint64_t s3DeleteReplicationConfiguration = 1ULL << 47; +static constexpr std::uint64_t s3Count = 48; +static constexpr std::uint64_t s3All = (1ULL << s3Count) - 1; + +namespace { +inline int op_to_perm(std::uint64_t op) { + switch (op) { + case s3GetObject: + case s3GetObjectTorrent: + case s3GetObjectVersion: + case s3GetObjectVersionTorrent: + case s3ListAllMyBuckets: + case s3ListBucket: + case s3ListBucketMultiPartUploads: + case s3ListBucketVersions: + case s3ListMultipartUploadParts: + return RGW_PERM_READ; + + case s3AbortMultipartUpload: + case s3CreateBucket: + case s3DeleteBucket: + case s3DeleteObject: + case s3DeleteObjectVersion: + case s3PutObject: + case s3RestoreObject: + return RGW_PERM_WRITE; + + case s3GetAccelerateConfiguration: + case s3GetBucketAcl: + case s3GetBucketCORS: + case s3GetBucketLocation: + case s3GetBucketLogging: + case s3GetBucketNotification: + case s3GetBucketPolicy: + case s3GetBucketRequestPayment: + case s3GetBucketTagging: + case s3GetBucketVersioning: + case s3GetBucketWebsite: + case s3GetLifecycleConfiguration: + case s3GetObjectAcl: + case s3GetObjectVersionAcl: + case s3GetReplicationConfiguration: + return RGW_PERM_READ_ACP; + + case s3DeleteBucketPolicy: + case s3DeleteBucketWebsite: + case s3DeleteReplicationConfiguration: + case s3PutAccelerateConfiguration: + case s3PutBucketAcl: + case s3PutBucketCORS: + case s3PutBucketLogging: + case s3PutBucketNotification: + case s3PutBucketPolicy: + case s3PutBucketRequestPayment: + case s3PutBucketTagging: + case s3PutBucketVersioning: + case s3PutBucketWebsite: + case s3PutLifecycleConfiguration: + case s3PutObjectAcl: + case s3PutObjectVersionAcl: + case s3PutReplicationConfiguration: + return RGW_PERM_WRITE_ACP; + + case s3All: + return RGW_PERM_FULL_CONTROL; + } + return RGW_PERM_INVALID; +} +} + +using Environment = boost::container::flat_map<std::string, std::string>; + +enum struct Partition { + aws, aws_cn, aws_us_gov, wildcard + // If we wanted our own ARNs for principal type unique to us + // (maybe to integrate better with Swift) or for anything else we + // provide that doesn't map onto S3, we could add an 'rgw' + // partition type. +}; + +enum struct Service { + apigateway, appstream, artifact, autoscaling, aws_portal, acm, + cloudformation, cloudfront, cloudhsm, cloudsearch, cloudtrail, + cloudwatch, events, logs, codebuild, codecommit, codedeploy, + codepipeline, cognito_idp, cognito_identity, cognito_sync, + config, datapipeline, dms, devicefarm, directconnect, + ds, dynamodb, ec2, ecr, ecs, ssm, elasticbeanstalk, elasticfilesystem, + elasticloadbalancing, elasticmapreduce, elastictranscoder, elasticache, + es, gamelift, glacier, health, iam, importexport, inspector, iot, + kms, kinesisanalytics, firehose, kinesis, lambda, lightsail, + machinelearning, aws_marketplace, aws_marketplace_management, + mobileanalytics, mobilehub, opsworks, opsworks_cm, polly, + redshift, rds, route53, route53domains, sts, servicecatalog, + ses, sns, sqs, s3, swf, sdb, states, storagegateway, support, + trustedadvisor, waf, workmail, workspaces, wildcard +}; + +struct ARN { + Partition partition; + Service service; + std::string region; + // Once we refity tenant, we should probably use that instead of a + // string. + std::string account; + std::string resource; + + ARN() + : partition(Partition::wildcard), service(Service::wildcard) {} + ARN(Partition partition, Service service, std::string region, + std::string account, std::string resource) + : partition(partition), service(service), region(std::move(region)), + account(std::move(account)), resource(std::move(resource)) {} + ARN(const rgw_obj& o); + ARN(const rgw_bucket& b); + ARN(const rgw_bucket& b, const std::string& o); + + static boost::optional<ARN> parse(const std::string& s, + bool wildcard = false); + std::string to_string() const; + + // `this` is the pattern + bool match(const ARN& candidate) const; +}; + +inline std::string to_string(const ARN& a) { + return a.to_string(); +} + +inline std::ostream& operator <<(std::ostream& m, const ARN& a) { + return m << to_string(a); +} + +bool operator ==(const ARN& l, const ARN& r); +bool operator <(const ARN& l, const ARN& r); + +using Address = std::bitset<128>; +struct MaskedIP { + bool v6; + Address addr; + // Since we're mapping IPv6 to IPv4 addresses, we may want to + // consider making the prefix always be in terms of a v6 address + // and just use the v6 bit to rewrite it as a v4 prefix for + // output. + unsigned int prefix; +}; + +std::ostream& operator <<(std::ostream& m, const MaskedIP& ip); +string to_string(const MaskedIP& m); + +inline bool operator ==(const MaskedIP& l, const MaskedIP& r) { + auto shift = std::max((l.v6 ? 128 : 32) - l.prefix, + (r.v6 ? 128 : 32) - r.prefix); + ceph_assert(shift > 0); + return (l.addr >> shift) == (r.addr >> shift); +} + +struct Condition { + TokenID op; + // Originally I was going to use a perfect hash table, but Marcus + // says keys are to be added at run-time not compile time. + + // In future development, use symbol internment. + std::string key; + bool ifexists = false; + // Much to my annoyance there is no actual way to do this in a + // typed way that is compatible with AWS. I know this because I've + // seen examples where the same value is used as a string in one + // context and a date in another. + std::vector<std::string> vals; + + Condition() = default; + Condition(TokenID op, const char* s, std::size_t len) : op(op) { + static constexpr char ifexistr[] = "IfExists"; + auto l = static_cast<const char*>(memmem(static_cast<const void*>(s), len, + static_cast<const void*>(ifexistr), + sizeof(ifexistr) -1)); + if (l && ((l + sizeof(ifexistr) - 1 == (s + len)))) { + ifexists = true; + key.assign(s, static_cast<const char*>(l) - s); + } else { + key.assign(s, len); + } + } + + bool eval(const Environment& e) const; + + static boost::optional<double> as_number(const std::string& s) { + std::size_t p = 0; + + try { + double d = std::stod(s, &p); + if (p < s.length()) { + return boost::none; + } + + return d; + } catch (const std::logic_error& e) { + return boost::none; + } + } + + static boost::optional<ceph::real_time> as_date(const std::string& s) { + std::size_t p = 0; + + try { + double d = std::stod(s, &p); + if (p == s.length()) { + return ceph::real_time( + std::chrono::seconds(static_cast<uint64_t>(d)) + + std::chrono::nanoseconds( + static_cast<uint64_t>((d - static_cast<uint64_t>(d)) + * 1000000000))); + } + + return from_iso_8601(boost::string_ref(s), false); + } catch (const std::logic_error& e) { + return boost::none; + } + } + + static boost::optional<bool> as_bool(const std::string& s) { + std::size_t p = 0; + + if (s.empty() || boost::iequals(s, "false")) { + return false; + } + + try { + double d = std::stod(s, &p); + if (p == s.length()) { + return !((d == +0.0) || (d = -0.0) || std::isnan(d)); + } + } catch (const std::logic_error& e) { + // Fallthrough + } + + return true; + } + + static boost::optional<ceph::bufferlist> as_binary(const std::string& s) { + // In a just world + ceph::bufferlist base64; + // I could populate a bufferlist + base64.push_back(buffer::create_static( + s.length(), + const_cast<char*>(s.data()))); // Yuck + // From a base64 encoded std::string. + ceph::bufferlist bin; + + try { + base64.decode_base64(bin); + } catch (const ceph::buffer::malformed_input& e) { + return boost::none; + } + return bin; + } + + static boost::optional<MaskedIP> as_network(const std::string& s); + + + struct ci_equal_to : public std::binary_function<const std::string, + const std::string, + bool> { + bool operator ()(const std::string& s1, + const std::string& s2) const { + return boost::iequals(s1, s2); + } + }; + + + template<typename F> + static bool orrible(F&& f, const std::string& c, + const std::vector<std::string>& v) { + for (const auto& d : v) { + if (std::forward<F>(f)(c, d)) { + return true; + } + } + return false; + } + + template<typename F, typename X> + static bool shortible(F&& f, X& x, const std::string& c, + const std::vector<std::string>& v) { + auto xc = std::forward<X>(x)(c); + if (!xc) { + return false; + } + + for (const auto& d : v) { + auto xd = std::forward<X>(x)(d); + if (!xd) { + continue; + } + + if (std::forward<F>(f)(*xc, *xd)) { + return true; + } + } + return false; + } +}; + +std::ostream& operator <<(std::ostream& m, const Condition& c); + +std::string to_string(const Condition& c); + +struct Statement { + boost::optional<std::string> sid = boost::none; + + boost::container::flat_set<rgw::auth::Principal> princ; + boost::container::flat_set<rgw::auth::Principal> noprinc; + + // Every statement MUST provide an effect. I just initialize it to + // deny as defensive programming. + Effect effect = Effect::Deny; + + std::uint64_t action = 0; + std::uint64_t notaction = 0; + + boost::container::flat_set<ARN> resource; + boost::container::flat_set<ARN> notresource; + + std::vector<Condition> conditions; + + Effect eval(const Environment& e, + boost::optional<const rgw::auth::Identity&> ida, + std::uint64_t action, const ARN& resource) const; +}; + +std::ostream& operator <<(ostream& m, const Statement& s); +std::string to_string(const Statement& s); + +struct PolicyParseException : public std::exception { + rapidjson::ParseResult pr; + + PolicyParseException(rapidjson::ParseResult&& pr) + : pr(pr) { } + const char* what() const noexcept override { + return rapidjson::GetParseError_En(pr.Code()); + } +}; + +struct Policy { + std::string text; + Version version = Version::v2008_10_17; + boost::optional<std::string> id = boost::none; + + std::vector<Statement> statements; + + Policy(CephContext* cct, const std::string& tenant, + const bufferlist& text); + + Effect eval(const Environment& e, + boost::optional<const rgw::auth::Identity&> ida, + std::uint64_t action, const ARN& resource) const; +}; + +std::ostream& operator <<(ostream& m, const Policy& p); +std::string to_string(const Policy& p); +} +} + +namespace std { +template<> +struct hash<::rgw::IAM::Service> { + size_t operator()(const ::rgw::IAM::Service& s) const noexcept { + // Invoke a default-constructed hash object for int. + return hash<int>()(static_cast<int>(s)); + } +}; +} + +#endif diff --git a/src/rgw/rgw_iam_policy_keywords.gperf b/src/rgw/rgw_iam_policy_keywords.gperf new file mode 100644 index 00000000000..d37fa6aff65 --- /dev/null +++ b/src/rgw/rgw_iam_policy_keywords.gperf @@ -0,0 +1,127 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +%language=C++ +%define class-name keyword_hash +%define lookup-function-name lookup +%struct-type +struct Keyword { + const char* name; + TokenKind kind; + TokenID id; + uint64_t specific; + bool arrayable; + bool objectable; +}; +%% +# Top-level +# +Version, TokenKind::top, TokenID::Version, 0, false, false +Id, TokenKind::top, TokenID::Id, 0, false, false +Statement, TokenKind::top, TokenID::Statement, 0, true, true +# +# Statement level +# +Sid, TokenKind::statement, TokenID::Sid, 0, false, false +Effect, TokenKind::statement, TokenID::Effect, 0, false, false +Principal, TokenKind::statement, TokenID::Principal, 0, false, true +NotPrincipal, TokenKind::statement, TokenID::NotPrincipal, 0, true, true +Action, TokenKind::statement, TokenID::Action, 0, true, false +NotAction, TokenKind::statement, TokenID::NotAction, 0, true, false +Resource, TokenKind::statement, TokenID::Resource, 0, true, false +NotResource, TokenKind::statement, TokenID::NotResource, 0, true, false +Condition, TokenKind::statement, TokenID::Condition, 0, true, true +# +# Condition operators +# +# String +StringEquals, TokenKind::cond_op, TokenID::StringEquals, (uint64_t) Type::string, true, true +StringNotEquals, TokenKind::cond_op, TokenID::StringNotEquals, (uint64_t) Type::string, true, true +StringEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringEqualsIgnoreCase, (uint64_t) Type::string, true, true +StringNotEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringNotEqualsIgnoreCase, (uint64_t) Type::string, true, true +StringLike, TokenKind::cond_op, TokenID::StringLike, (uint64_t) Type::string, true, true, +StringNotLike, TokenKind::cond_op, TokenID::StringNotLike, (uint64_t) Type::string, true, true +# Numeric +NumericEquals, TokenKind::cond_op, TokenID::NumericEquals, (uint64_t) Type::number, true, true +NumericNotEquals, TokenKind::cond_op, TokenID::NumericNotEquals, (uint64_t) Type::number, true, true +NumericLessThan, TokenKind::cond_op, TokenID::NumericLessThan, (uint64_t) Type::number, true, true +NumericLessThanEquals, TokenKind::cond_op, TokenID::NumericLessThanEquals, (uint64_t) Type::number, true, true +NumericGreaterThan, TokenKind::cond_op, TokenID::NumericGreaterThan, (uint64_t) Type::number, true, true +NumericGreaterThanEquals, TokenKind::cond_op, TokenID::NumericGreaterThanEquals, (uint64_t) Type::number, true, true +# Date +DateEquals, TokenKind::cond_op, TokenID::DateEquals, (uint64_t) Type::date, true, true +DateNotEquals, TokenKind::cond_op, TokenID::DateNotEquals, (uint64_t) Type::date, true, true +DateLessThan, TokenKind::cond_op, TokenID::DateLessThan, (uint64_t) Type::date, true, true +DateLessThanEquals, TokenKind::cond_op, TokenID::DateLessThanEquals, (uint64_t) Type::date, true, true +DateGreaterThan, TokenKind::cond_op, TokenID::DateGreaterThan, (uint64_t) Type::date, true, true +DateGreaterThanEquals, TokenKind::cond_op, TokenID::DateGreaterThanEquals, (uint64_t) Type::date, true, true +# Bool +Bool, TokenKind::cond_op, TokenID::Bool, (uint64_t) Type::boolean, true, true +# Binary +BinaryEquals, TokenKind::cond_op, TokenID::BinaryEquals, (uint64_t) Type::binary, true, true +# IP Address +IpAddress, TokenKind::cond_op, TokenID::IpAddress, (uint64_t) Type::ipaddr, true, true +NotIpAddress, TokenKind::cond_op, TokenID::NotIpAddress, (uint64_t) Type::ipaddr, true, true +# Amazon Resource Names +ArnEquals, TokenKind::cond_op, TokenID::ArnEquals, (uint64_t) Type::arn, true, true +ArnNotEquals, TokenKind::cond_op, TokenID::ArnNotEquals, (uint64_t) Type::arn, true, true +ArnLike, TokenKind::cond_op, TokenID::ArnLike, (uint64_t) Type::arn, true, true +ArnNotLike, TokenKind::cond_op, TokenID::ArnNotLike, (uint64_t) Type::arn, true, true +# Null +Null, TokenKind::cond_op, TokenID::Null, (uint64_t) Type::null, true, true +# +# Condition keys +# +# AWS +#aws:CurrentTime, TokenKind::cond_key, TokenID::awsCurrentTime, (uint64_t) Type::date, true, false +#aws:EpochTime, TokenKind::cond_key, TokenID::awsEpochTime, (uint64_t) Type::date, true, false +#aws:TokenIssueTime, TokenKind::cond_key, TokenID::awsTokenIssueTime, (uint64_t) Type::date, true, false +#aws:MultiFactorAuthPresent, TokenKind::cond_key, TokenID::awsMultiFactorAuthPresent, (uint64_t) Type::boolean, true, false +#aws:MultiFactorAuthAge, TokenKind::cond_key, TokenID::awsMultiFactorAuthAge, (uint64_t) Type::number, true, false +#aws:PrincipalType, TokenKind::cond_key, TokenID::awsPrincipalType, (uint64_t) Type::string, true, false +#aws:Referer, TokenKind::cond_key, TokenID::awsReferer, (uint64_t) Type::string, true, false +#aws:SecureTransport, TokenKind::cond_key, TokenID::awsSecureTransport, (uint64_t) Type::boolean, true, false +#aws:SourceArn, TokenKind::cond_key, TokenID::awsSourceArn, (uint64_t) Type::arn, true, false +#aws:SourceIp, TokenKind::cond_key, TokenID::awsSourceIp, (uint64_t) Type::ipaddr, true, false +#aws:SourceVpc, TokenKind::cond_key, TokenID::awsSourceVpc, (uint64_t) Type::string, true, false +#aws:SourceVpce, TokenKind::cond_key, TokenID::awsSourceVpce, (uint64_t) Type::string, true, false +#aws:UserAgent, TokenKind::cond_key, TokenID::awsUserAgent, (uint64_t) Type::string, true, false +#aws:userid, TokenKind::cond_key, TokenID::awsuserid, (uint64_t) Type::string, true, false +#aws:username, TokenKind::cond_key, TokenID::awsusername, (uint64_t) Type::string, true, false +# S3 +#s3:x-amz-acl, TokenKind::cond_key, TokenID::s3x_amz_acl, (uint64_t) Type::string, true, false +#s3:x-amz-grant-read, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false +#s3:x-amz-grant-write, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false +#s3:x-amz-grant-read-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false +#s3:x-amz-grant-write-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false +#s3:x-amz-grant-full-control, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false +#s3:x-amz-copy-source, TokenKind::cond_key, TokenID::s3x_amz_copy_source, (uint64_t) Type::string, true, false +#s3:x-amz-server-side-encryption, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption, (uint64_t) Type::boolean, true, false +#s3:x-amz-server-side-encryption-aws-kms-key-id, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption_aws_kms_key_id, (uint64_t) Type::arn, true, false +#s3:x-amz-metadata-directive, TokenKind::cond_key, TokenID::s3x_amz_metadata_directive, (uint64_t) Type::string, true, false +#s3:x-amz-storage-class, TokenKind::cond_key, TokenID::s3x_amz_storage_class, (uint64_t) Type::string, true, false +#s3:VersionId, TokenKind::cond_key, TokenID::s3VersionId, (uint64_t) Type::string, true, false +#s3:LocationConstraint, TokenKind::cond_key, TokenID::s3LocationConstraint, (uint64_t) Type::string, true, false +#s3:prefix, TokenKind::cond_key, TokenID::s3prefix, (uint64_t) Type::string, true, false +#s3:delimiter, TokenKind::cond_key, TokenID::s3delimiter, (uint64_t) Type::string, true, false +#s3:max-keys, TokenKind::cond_key, TokenID::s3max_keys, (uint64_t) Type::number, true, false +#s3:signatureversion, TokenKind::cond_key, TokenID::s3signatureversion, (uint64_t) Type::string, true, false +#s3:authType, TokenKind::cond_key, TokenID::s3authType, (uint64_t) Type::string, true, false +#s3:signatureAge, TokenKind::cond_key, TokenID::s3signatureAge, (uint64_t) Type::number, true, false +#s3:x-amz-content-sha256, TokenKind::cond_key, TokenID::s3x_amz_content_sha256, (uint64_t) Type::string, true, false +# +# Version Keywords +# +2008-10-17, TokenKind::version_key, TokenID::v2008_10_17, (uint64_t) Version::v2008_10_17, false, false +2012-10-17, TokenKind::version_key, TokenID::v2012_10_17, (uint64_t) Version::v2012_10_17, false, false +# +# Effect Keywords +# +Allow, TokenKind::effect_key, TokenID::Allow, (uint64_t) Effect::Allow, false, false +Deny, TokenKind::effect_key, TokenID::Deny, (uint64_t) Effect::Deny, false, false +# +# Principal types +# +AWS, TokenKind::princ_type, TokenID::AWS, 0, true, false +Federated, TokenKind::princ_type, TokenID::Federated, 0, true, false +Service, TokenKind::princ_type, TokenID::Service, 0, true, false +CanonicalUser, TokenKind::princ_type, TokenID::CanonicalUser, 0, true, false diff --git a/src/rgw/rgw_iam_policy_keywords.h b/src/rgw/rgw_iam_policy_keywords.h new file mode 100644 index 00000000000..a0cd34b6286 --- /dev/null +++ b/src/rgw/rgw_iam_policy_keywords.h @@ -0,0 +1,139 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RGW_POLICY_S3V2_KEYWORDS_H +#define CEPH_RGW_POLICY_S3V2_KEYWORDS_H + +namespace rgw { +namespace IAM { + +enum class TokenKind { + pseudo, top, statement, cond_op, cond_key, version_key, effect_key, + princ_type +}; + +enum class TokenID { + /// Pseudo-token + Top, + + /// Top-level tokens + Version, Id, Statement, + + /// Statement level tokens + Sid, Effect, Principal, NotPrincipal, Action, NotAction, + Resource, NotResource, Condition, + + /// Condition Operators! + /// Any of these, except Null, can have an IfExists variant. + + // String! + StringEquals, StringNotEquals, StringEqualsIgnoreCase, + StringNotEqualsIgnoreCase, StringLike, StringNotLike, + + // Numeric! + NumericEquals, NumericNotEquals, NumericLessThan, NumericLessThanEquals, + NumericGreaterThan, NumericGreaterThanEquals, + + // Date! + DateEquals, DateNotEquals, DateLessThan, DateLessThanEquals, + DateGreaterThan, DateGreaterThanEquals, + + // Bool! + Bool, + + // Binary! + BinaryEquals, + + // IP Address! + IpAddress, NotIpAddress, + + // Amazon Resource Names! (Does S3 need this?) + ArnEquals, ArnNotEquals, ArnLike, ArnNotLike, + + // Null! + Null, + +#if 0 // Keys are done at runtime now + + /// Condition Keys! + awsCurrentTime, + awsEpochTime, + awsTokenIssueTime, + awsMultiFactorAuthPresent, + awsMultiFactorAuthAge, + awsPrincipalType, + awsReferer, + awsSecureTransport, + awsSourceArn, + awsSourceIp, + awsSourceVpc, + awsSourceVpce, + awsUserAgent, + awsuserid, + awsusername, + s3x_amz_acl, + s3x_amz_grant_permission, + s3x_amz_copy_source, + s3x_amz_server_side_encryption, + s3x_amz_server_side_encryption_aws_kms_key_id, + s3x_amz_metadata_directive, + s3x_amz_storage_class, + s3VersionId, + s3LocationConstraint, + s3prefix, + s3delimiter, + s3max_keys, + s3signatureversion, + s3authType, + s3signatureAge, + s3x_amz_content_sha256, +#else + CondKey, +#endif + + /// + /// Versions! + /// + v2008_10_17, + v2012_10_17, + + /// + /// Effects! + /// + Allow, + Deny, + + /// Principal Types! + AWS, + Federated, + Service, + CanonicalUser +}; + + +enum class Version { + v2008_10_17, + v2012_10_17 +}; + + +enum class Effect { + Allow, + Deny, + Pass +}; + +enum class Type { + string, + number, + date, + boolean, + binary, + ipaddr, + arn, + null +}; +} +} + +#endif // CEPH_RGW_POLICY_S3V2_KEYWORDS_H diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc index 067ad6fadbb..2a183b59195 100644 --- a/src/rgw/rgw_json_enc.cc +++ b/src/rgw/rgw_json_enc.cc @@ -1290,6 +1290,7 @@ void rgw_meta_sync_marker::decode_json(JSONObj *obj) utime_t ut; JSONDecoder::decode_json("timestamp", ut, obj); timestamp = ut.to_real_time(); + JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); } void rgw_meta_sync_marker::dump(Formatter *f) const @@ -1300,6 +1301,7 @@ void rgw_meta_sync_marker::dump(Formatter *f) const encode_json("total_entries", total_entries, f); encode_json("pos", pos, f); encode_json("timestamp", utime_t(timestamp), f); + encode_json("realm_epoch", realm_epoch, f); } void rgw_meta_sync_status::decode_json(JSONObj *obj) diff --git a/src/rgw/rgw_keystone.cc b/src/rgw/rgw_keystone.cc index 933308240e2..3294380ba10 100644 --- a/src/rgw/rgw_keystone.cc +++ b/src/rgw/rgw_keystone.cc @@ -613,7 +613,7 @@ int TokenCache::RevokeThread::check_revoked() bool TokenCache::going_down() const { - return (down_flag.read() != 0); + return down_flag; } void* TokenCache::RevokeThread::entry() diff --git a/src/rgw/rgw_keystone.h b/src/rgw/rgw_keystone.h index 5bacfb0da5e..df5650c5927 100644 --- a/src/rgw/rgw_keystone.h +++ b/src/rgw/rgw_keystone.h @@ -13,6 +13,8 @@ #include "rgw_http_client.h" #include "common/Cond.h" +#include <atomic> + int rgw_open_cms_envelope(CephContext *cct, const std::string& src, std::string& dst); /* out */ @@ -216,7 +218,7 @@ class TokenCache { list<string>::iterator lru_iter; }; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; class RevokeThread : public Thread { friend class TokenCache; @@ -271,7 +273,7 @@ class TokenCache { } ~TokenCache() { - down_flag.set(1); + down_flag = true; revocator.stop(); revocator.join(); diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index 61d4cc94427..b70d2596f26 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -2,11 +2,13 @@ #include <iostream> #include <map> +#include <boost/algorithm/string/split.hpp> +#include <boost/algorithm/string.hpp> + #include "common/Formatter.h" #include <common/errno.h> #include "auth/Crypto.h" #include "cls/rgw/cls_rgw_client.h" -#include "cls/refcount/cls_refcount_client.h" #include "cls/lock/cls_lock_client.h" #include "rgw_common.h" #include "rgw_bucket.h" @@ -198,21 +200,6 @@ bool RGWLC::if_already_run_today(time_t& start_date) return false; } -static std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { - std::stringstream ss(s); - std::string item; - while (std::getline(ss, item, delim)) { - elems.push_back(item); - } - return elems; -} - -static std::vector<std::string> split(const std::string &s, char delim) { - std::vector<std::string> elems; - split(s, delim, elems); - return elems; -} - int RGWLC::bucket_lc_prepare(int index) { map<string, int > entries; @@ -324,7 +311,7 @@ int RGWLC::bucket_lc_process(string& shard_id) vector<rgw_bucket_dir_entry> objs; RGWObjectCtx obj_ctx(store); vector<std::string> result; - result = split(shard_id, ':'); + boost::split(result, shard_id, boost::is_any_of(":")); string bucket_tenant = result[0]; string bucket_name = result[1]; string bucket_id = result[2]; @@ -496,8 +483,7 @@ int RGWLC::bucket_lc_process(string& shard_id) return ret; } -int RGWLC::bucket_lc_post(int index, int max_lock_sec, cls_rgw_lc_obj_head& head, - pair<string, int >& entry, int& result) +int RGWLC::bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result) { utime_t lock_duration(cct->_conf->rgw_lc_lock_max_time, 0); @@ -637,7 +623,7 @@ int RGWLC::process(int index, int max_lock_secs) } l.unlock(&store->lc_pool_ctx, obj_names[index]); ret = bucket_lc_process(entry.first); - ret = bucket_lc_post(index, max_lock_secs, head, entry, ret); + bucket_lc_post(index, max_lock_secs, entry, ret); return 0; exit: l.unlock(&store->lc_pool_ctx, obj_names[index]); @@ -655,7 +641,7 @@ void RGWLC::start_processor() void RGWLC::stop_processor() { - down_flag.set(1); + down_flag = true; if (worker) { worker->stop(); worker->join(); @@ -672,7 +658,7 @@ void RGWLC::LCWorker::stop() bool RGWLC::going_down() { - return (down_flag.read() != 0); + return down_flag; } bool RGWLC::LCWorker::should_work(utime_t& now) diff --git a/src/rgw/rgw_lc.h b/src/rgw/rgw_lc.h index 61edd78aac7..480ba3fe513 100644 --- a/src/rgw/rgw_lc.h +++ b/src/rgw/rgw_lc.h @@ -9,7 +9,6 @@ #include "common/debug.h" #include "include/types.h" -#include "include/atomic.h" #include "include/rados/librados.hpp" #include "common/Mutex.h" #include "common/Cond.h" @@ -19,7 +18,8 @@ #include "rgw_multi.h" #include "cls/rgw/cls_rgw_types.h" -using namespace std; +#include <atomic> + #define HASH_PRIME 7877 #define MAX_ID_LEN 255 static string lc_oid_prefix = "lc"; @@ -227,7 +227,7 @@ class RGWLC { RGWRados *store; int max_objs; string *obj_names; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; string cookie; class LCWorker : public Thread { @@ -261,8 +261,7 @@ class RGWLC { int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map); int bucket_lc_prepare(int index); int bucket_lc_process(string& shard_id); - int bucket_lc_post(int index, int max_lock_sec, cls_rgw_lc_obj_head& head, - pair<string, int >& entry, int& result); + int bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result); bool going_down(); void start_processor(); void stop_processor(); diff --git a/src/rgw/rgw_lc_s3.h b/src/rgw/rgw_lc_s3.h index c969ac979c9..52f6987e487 100644 --- a/src/rgw/rgw_lc_s3.h +++ b/src/rgw/rgw_lc_s3.h @@ -6,16 +6,10 @@ #include <iostream> #include <include/types.h> -#include <expat.h> - #include "include/str_list.h" #include "rgw_lc.h" #include "rgw_xml.h" - - -using namespace std; - class LCID_S3 : public XMLObj { public: diff --git a/src/rgw/rgw_ldap.cc b/src/rgw/rgw_ldap.cc index d07b295153e..a39afa6c081 100644 --- a/src/rgw/rgw_ldap.cc +++ b/src/rgw/rgw_ldap.cc @@ -94,6 +94,7 @@ namespace rgw { if (ret != LDAP_SUCCESS) { ldout(g_ceph_context, 10) << __func__ << " simple_bind failed uid=" << uid + << "ldap err=" << ret << dendl; } ldap_memfree(dn); diff --git a/src/rgw/rgw_loadgen_process.cc b/src/rgw/rgw_loadgen_process.cc index 7f003facbb7..23e6fefca73 100644 --- a/src/rgw/rgw_loadgen_process.cc +++ b/src/rgw/rgw_loadgen_process.cc @@ -13,6 +13,8 @@ #include "rgw_loadgen.h" #include "rgw_client_io.h" +#include <atomic> + #define dout_subsys ceph_subsys_rgw extern void signal_shutdown(); @@ -37,7 +39,7 @@ void RGWLoadGenProcess::run() vector<string> buckets(num_buckets); - atomic_t failed; + std::atomic<long int> failed = { 0 }; for (i = 0; i < num_buckets; i++) { buckets[i] = "/loadgen"; @@ -51,7 +53,7 @@ void RGWLoadGenProcess::run() string *objs = new string[num_objs]; - if (failed.read()) { + if (failed) { derr << "ERROR: bucket creation failed" << dendl; goto done; } @@ -69,7 +71,7 @@ void RGWLoadGenProcess::run() checkpoint(); - if (failed.read()) { + if (failed) { derr << "ERROR: bucket creation failed" << dendl; goto done; } @@ -102,7 +104,7 @@ done: void RGWLoadGenProcess::gen_request(const string& method, const string& resource, - int content_length, atomic_t* fail_flag) + int content_length, std::atomic<long int>* fail_flag) { RGWLoadGenRequest* req = new RGWLoadGenRequest(store->get_new_req_id(), method, resource, @@ -138,7 +140,7 @@ void RGWLoadGenProcess::handle_request(RGWRequest* r) dout(20) << "process_request() returned " << ret << dendl; if (req->fail_flag) { - req->fail_flag->inc(); + req->fail_flag++; } } diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc index 153ddb060d7..51dbd1bbe93 100644 --- a/src/rgw/rgw_log.cc +++ b/src/rgw/rgw_log.cc @@ -218,7 +218,7 @@ static void log_usage(struct req_state *s, const string& op_name) rgw_usage_data data(bytes_sent, bytes_received); data.ops = 1; - if (!error) + if (!s->is_err()) data.successful_ops = 1; entry.add(op_name, data); diff --git a/src/rgw/rgw_log.h b/src/rgw/rgw_log.h index 5e612b7ca22..25e99e096de 100644 --- a/src/rgw/rgw_log.h +++ b/src/rgw/rgw_log.h @@ -3,7 +3,6 @@ #ifndef CEPH_RGW_LOG_H #define CEPH_RGW_LOG_H - #include <boost/container/flat_map.hpp> #include "rgw_common.h" #include "include/utime.h" diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index 1bec56553a6..01ef5556c22 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -53,13 +53,14 @@ #include "rgw_request.h" #include "rgw_process.h" #include "rgw_frontend.h" -#if defined(WITH_RADOSGW_ASIO_FRONTEND) +#if defined(WITH_RADOSGW_BEAST_FRONTEND) #include "rgw_asio_frontend.h" -#endif /* WITH_RADOSGW_ASIO_FRONTEND */ +#endif /* WITH_RADOSGW_BEAST_FRONTEND */ #include <map> #include <string> #include <vector> +#include <atomic> #include "include/types.h" #include "common/BackTrace.h" @@ -77,11 +78,11 @@ static sig_t sighandler_alrm; class RGWProcess; static int signal_fd[2] = {0, 0}; -static atomic_t disable_signal_fd; +static std::atomic<int64_t> disable_signal_fd = { 0 }; void signal_shutdown() { - if (!disable_signal_fd.read()) { + if (!disable_signal_fd) { int val = 0; int ret = write(signal_fd[0], (char *)&val, sizeof(val)); if (ret < 0) { @@ -470,8 +471,6 @@ int main(int argc, const char **argv) RGWFrontend *fe = NULL; if (framework == "civetweb" || framework == "mongoose") { - int port; - config->get_val("port", 80, &port); std::string uri_prefix; config->get_val("prefix", "", &uri_prefix); @@ -489,9 +488,9 @@ int main(int argc, const char **argv) fe = new RGWLoadGenFrontend(env, config); } -#if defined(WITH_RADOSGW_ASIO_FRONTEND) - else if ((framework == "asio") && - cct->check_experimental_feature_enabled("rgw-asio-frontend")) { +#if defined(WITH_RADOSGW_BEAST_FRONTEND) + else if ((framework == "beast") && + cct->check_experimental_feature_enabled("rgw-beast-frontend")) { int port; config->get_val("port", 80, &port); std::string uri_prefix; @@ -499,7 +498,7 @@ int main(int argc, const char **argv) RGWProcessEnv env{ store, &rest, olog, port, uri_prefix, auth_registry }; fe = new RGWAsioFrontend(env); } -#endif /* WITH_RADOSGW_ASIO_FRONTEND */ +#endif /* WITH_RADOSGW_BEAST_FRONTEND */ #if defined(WITH_RADOSGW_FCGI_FRONTEND) else if (framework == "fastcgi" || framework == "fcgi") { std::string uri_prefix; diff --git a/src/rgw/rgw_meta_sync_status.h b/src/rgw/rgw_meta_sync_status.h index e913e8ffb21..e34bb05be1d 100644 --- a/src/rgw/rgw_meta_sync_status.h +++ b/src/rgw/rgw_meta_sync_status.h @@ -55,28 +55,33 @@ struct rgw_meta_sync_marker { uint64_t total_entries; uint64_t pos; real_time timestamp; + epoch_t realm_epoch{0}; //< realm_epoch of period marker rgw_meta_sync_marker() : state(FullSync), total_entries(0), pos(0) {} void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); ::encode(state, bl); ::encode(marker, bl); ::encode(next_step_marker, bl); ::encode(total_entries, bl); ::encode(pos, bl); ::encode(timestamp, bl); + ::encode(realm_epoch, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); ::decode(state, bl); ::decode(marker, bl); ::decode(next_step_marker, bl); ::decode(total_entries, bl); ::decode(pos, bl); ::decode(timestamp, bl); + if (struct_v >= 2) { + ::decode(realm_epoch, bl); + } DECODE_FINISH(bl); } diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc index e26201bc3be..0a0f3dec324 100644 --- a/src/rgw/rgw_metadata.cc +++ b/src/rgw/rgw_metadata.cc @@ -313,28 +313,6 @@ public: static RGWMetadataTopHandler md_top_handler; -static const std::string mdlog_history_oid = "meta.history"; - -struct RGWMetadataLogHistory { - epoch_t oldest_realm_epoch; - std::string oldest_period_id; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(oldest_realm_epoch, bl); - ::encode(oldest_period_id, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& p) { - DECODE_START(1, p); - ::decode(oldest_realm_epoch, p); - ::decode(oldest_period_id, p); - DECODE_FINISH(p); - } -}; -WRITE_CLASS_ENCODER(RGWMetadataLogHistory) - - RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store) { @@ -351,15 +329,18 @@ RGWMetadataManager::~RGWMetadataManager() handlers.clear(); } +const std::string RGWMetadataLogHistory::oid = "meta.history"; + namespace { -int read_history(RGWRados *store, RGWMetadataLogHistory *state) +int read_history(RGWRados *store, RGWMetadataLogHistory *state, + RGWObjVersionTracker *objv_tracker) { RGWObjectCtx ctx{store}; auto& pool = store->get_zone_params().log_pool; - const auto& oid = mdlog_history_oid; + const auto& oid = RGWMetadataLogHistory::oid; bufferlist bl; - int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, nullptr, nullptr); + int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr); if (ret < 0) { return ret; } @@ -375,19 +356,141 @@ int read_history(RGWRados *store, RGWMetadataLogHistory *state) } int write_history(RGWRados *store, const RGWMetadataLogHistory& state, - bool exclusive = false) + RGWObjVersionTracker *objv_tracker, bool exclusive = false) { bufferlist bl; state.encode(bl); auto& pool = store->get_zone_params().log_pool; - const auto& oid = mdlog_history_oid; + const auto& oid = RGWMetadataLogHistory::oid; return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), - exclusive, nullptr, real_time{}); + exclusive, objv_tracker, real_time{}); } using Cursor = RGWPeriodHistory::Cursor; +/// read the mdlog history and use it to initialize the given cursor +class ReadHistoryCR : public RGWCoroutine { + RGWRados *store; + Cursor *cursor; + RGWObjVersionTracker *objv_tracker; + RGWMetadataLogHistory state; + public: + ReadHistoryCR(RGWRados *store, Cursor *cursor, + RGWObjVersionTracker *objv_tracker) + : RGWCoroutine(store->ctx()), store(store), cursor(cursor), + objv_tracker(objv_tracker) + {} + + int operate() { + reenter(this) { + yield { + rgw_raw_obj obj{store->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + constexpr bool empty_on_enoent = false; + + using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>; + call(new ReadCR(store->get_async_rados(), store, obj, + &state, empty_on_enoent, objv_tracker)); + } + if (retcode < 0) { + ldout(cct, 1) << "failed to read mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + *cursor = store->period_history->lookup(state.oldest_realm_epoch); + if (!*cursor) { + return set_cr_error(cursor->get_error()); + } + + ldout(cct, 10) << "read mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// write the given cursor to the mdlog history +class WriteHistoryCR : public RGWCoroutine { + RGWRados *store; + Cursor cursor; + RGWObjVersionTracker *objv; + RGWMetadataLogHistory state; + public: + WriteHistoryCR(RGWRados *store, const Cursor& cursor, + RGWObjVersionTracker *objv) + : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv) + {} + + int operate() { + reenter(this) { + state.oldest_period_id = cursor.get_period().get_id(); + state.oldest_realm_epoch = cursor.get_epoch(); + + yield { + rgw_raw_obj obj{store->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + + using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>; + call(new WriteCR(store->get_async_rados(), store, obj, state, objv)); + } + if (retcode < 0) { + ldout(cct, 1) << "failed to write mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + ldout(cct, 10) << "wrote mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// update the mdlog history to reflect trimmed logs +class TrimHistoryCR : public RGWCoroutine { + RGWRados *store; + const Cursor cursor; //< cursor to trimmed period + RGWObjVersionTracker *objv; //< to prevent racing updates + Cursor next; //< target cursor for oldest log period + Cursor existing; //< existing cursor read from disk + + public: + TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv) + : RGWCoroutine(store->ctx()), + store(store), cursor(cursor), objv(objv), next(cursor) + { + next.next(); // advance past cursor + } + + int operate() { + reenter(this) { + // read an existing history, and write the new history if it's newer + yield call(new ReadHistoryCR(store, &existing, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + // reject older trims with ECANCELED + if (cursor.get_epoch() < existing.get_epoch()) { + ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch() + << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl; + return set_cr_error(-ECANCELED); + } + // overwrite with updated history + yield call(new WriteHistoryCR(store, next, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + // traverse all the way back to the beginning of the period history, and // return a cursor to the first period in a fully attached history Cursor find_oldest_period(RGWRados *store) @@ -431,7 +534,8 @@ Cursor RGWMetadataManager::init_oldest_log_period() { // read the mdlog history RGWMetadataLogHistory state; - int ret = read_history(store, &state); + RGWObjVersionTracker objv; + int ret = read_history(store, &state, &objv); if (ret == -ENOENT) { // initialize the mdlog history and write it @@ -446,7 +550,7 @@ Cursor RGWMetadataManager::init_oldest_log_period() state.oldest_period_id = cursor.get_period().get_id(); constexpr bool exclusive = true; // don't overwrite - int ret = write_history(store, state, exclusive); + int ret = write_history(store, state, &objv, exclusive); if (ret < 0 && ret != -EEXIST) { ldout(cct, 1) << "failed to write mdlog history: " << cpp_strerror(ret) << dendl; @@ -486,7 +590,7 @@ Cursor RGWMetadataManager::init_oldest_log_period() Cursor RGWMetadataManager::read_oldest_log_period() const { RGWMetadataLogHistory state; - int ret = read_history(store, &state); + int ret = read_history(store, &state, nullptr); if (ret < 0) { ldout(store->ctx(), 1) << "failed to read mdlog history: " << cpp_strerror(ret) << dendl; @@ -500,6 +604,18 @@ Cursor RGWMetadataManager::read_oldest_log_period() const return store->period_history->lookup(state.oldest_realm_epoch); } +RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period, + RGWObjVersionTracker *objv) const +{ + return new ReadHistoryCR(store, period, objv); +} + +RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period, + RGWObjVersionTracker *objv) const +{ + return new TrimHistoryCR(store, period, objv); +} + int RGWMetadataManager::init(const std::string& current_period) { // open a log for the current period diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h index 8b7526399a8..4d077e8f888 100644 --- a/src/rgw/rgw_metadata.h +++ b/src/rgw/rgw_metadata.h @@ -18,6 +18,7 @@ class RGWRados; +class RGWCoroutine; class JSONObj; struct RGWObjVersionTracker; @@ -265,6 +266,27 @@ struct RGWMetadataLogData { }; WRITE_CLASS_ENCODER(RGWMetadataLogData) +struct RGWMetadataLogHistory { + epoch_t oldest_realm_epoch; + std::string oldest_period_id; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(oldest_realm_epoch, bl); + ::encode(oldest_period_id, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& p) { + DECODE_START(1, p); + ::decode(oldest_realm_epoch, p); + ::decode(oldest_period_id, p); + DECODE_FINISH(p); + } + + static const std::string oid; +}; +WRITE_CLASS_ENCODER(RGWMetadataLogHistory) + class RGWMetadataManager { map<string, RGWMetadataHandler *> handlers; CephContext *cct; @@ -303,6 +325,16 @@ public: /// period history RGWPeriodHistory::Cursor read_oldest_log_period() const; + /// read the oldest log period asynchronously and write its result to the + /// given cursor pointer + RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period, + RGWObjVersionTracker *objv) const; + + /// try to advance the oldest log period when the given period is trimmed, + /// using a rados lock to provide atomicity + RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period, + RGWObjVersionTracker *objv) const; + /// find or create the metadata log for the given period RGWMetadataLog* get_log(const std::string& period); diff --git a/src/rgw/rgw_object_expirer_core.cc b/src/rgw/rgw_object_expirer_core.cc index 3bc033f0a68..fb0444186c9 100644 --- a/src/rgw/rgw_object_expirer_core.cc +++ b/src/rgw/rgw_object_expirer_core.cc @@ -230,7 +230,7 @@ bool RGWObjectExpirer::inspect_all_shards(const utime_t& last_run, bool RGWObjectExpirer::going_down() { - return (down_flag.read() != 0); + return down_flag; } void RGWObjectExpirer::start_processor() @@ -241,7 +241,7 @@ void RGWObjectExpirer::start_processor() void RGWObjectExpirer::stop_processor() { - down_flag.set(1); + down_flag = true; if (worker) { worker->stop(); worker->join(); diff --git a/src/rgw/rgw_object_expirer_core.h b/src/rgw/rgw_object_expirer_core.h index 83f0dd30754..6fe8d1410bf 100644 --- a/src/rgw/rgw_object_expirer_core.h +++ b/src/rgw/rgw_object_expirer_core.h @@ -8,6 +8,7 @@ #include <iostream> #include <sstream> #include <string> +#include <atomic> #include "auth/Crypto.h" @@ -37,6 +38,8 @@ #include "rgw_usage.h" #include "rgw_replica_log.h" +#include <atomic> + class RGWObjectExpirer { protected: RGWRados *store; @@ -65,7 +68,7 @@ protected: }; OEWorker *worker; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; public: explicit RGWObjectExpirer(RGWRados *_store) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 465f1e82b2d..6ddd6748a1b 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -3,12 +3,15 @@ #include <errno.h> #include <stdlib.h> +#include <system_error> #include <unistd.h> #include <sstream> #include <boost/algorithm/string/predicate.hpp> #include <boost/optional.hpp> +#include <boost/utility/in_place_factory.hpp> +#include <boost/bind.hpp> #include "common/Clock.h" #include "common/armor.h" @@ -43,6 +46,7 @@ #include "compressor/Compressor.h" +#include "rgw_acl_swift.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw @@ -50,12 +54,21 @@ using namespace std; using namespace librados; using ceph::crypto::MD5; +using boost::optional; +using boost::none; + +using rgw::IAM::ARN; +using rgw::IAM::Effect; +using rgw::IAM::Policy; +using rgw::IAM::Policy; static string mp_ns = RGW_OBJ_NS_MULTIPART; static string shadow_ns = RGW_OBJ_NS_SHADOW; -static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store, bufferlist& in_data, JSONParser *jp); +static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name); +static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store, + bufferlist& in_data, JSONParser *jp, req_info *forward_info = nullptr); static MultipartMetaFilter mp_filter; @@ -137,9 +150,9 @@ static int decode_policy(CephContext *cct, static int get_user_policy_from_attr(CephContext * const cct, - RGWRados * const store, - map<string, bufferlist>& attrs, - RGWAccessControlPolicy& policy /* out */) + RGWRados * const store, + map<string, bufferlist>& attrs, + RGWAccessControlPolicy& policy /* out */) { auto aiter = attrs.find(RGW_ATTR_ACL); if (aiter != attrs.end()) { @@ -155,11 +168,11 @@ static int get_user_policy_from_attr(CephContext * const cct, } static int get_bucket_instance_policy_from_attr(CephContext *cct, - RGWRados *store, - RGWBucketInfo& bucket_info, - map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, - rgw_raw_obj& obj) + RGWRados *store, + RGWBucketInfo& bucket_info, + map<string, bufferlist>& bucket_attrs, + RGWAccessControlPolicy *policy, + rgw_raw_obj& obj) { map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_ACL); @@ -181,12 +194,12 @@ static int get_bucket_instance_policy_from_attr(CephContext *cct, } static int get_obj_policy_from_attr(CephContext *cct, - RGWRados *store, - RGWObjectCtx& obj_ctx, - RGWBucketInfo& bucket_info, - map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, - rgw_obj& obj) + RGWRados *store, + RGWObjectCtx& obj_ctx, + RGWBucketInfo& bucket_info, + map<string, bufferlist>& bucket_attrs, + RGWAccessControlPolicy *policy, + rgw_obj& obj) { bufferlist bl; int ret = 0; @@ -221,15 +234,27 @@ static int get_obj_policy_from_attr(CephContext *cct, * Returns: 0 on success, -ERR# otherwise. */ static int get_bucket_policy_from_attr(CephContext *cct, - RGWRados *store, - RGWBucketInfo& bucket_info, - map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy) + RGWRados *store, + RGWBucketInfo& bucket_info, + map<string, bufferlist>& bucket_attrs, + RGWAccessControlPolicy *policy) { rgw_raw_obj instance_obj; store->get_bucket_instance_obj(bucket_info.bucket, instance_obj); return get_bucket_instance_policy_from_attr(cct, store, bucket_info, bucket_attrs, - policy, instance_obj); + policy, instance_obj); +} + +static optional<Policy> get_iam_policy_from_attr(CephContext* cct, + RGWRados* store, + map<string, bufferlist>& attrs, + const string& tenant) { + auto i = attrs.find(RGW_ATTR_IAM_POLICY); + if (i != attrs.end()) { + return Policy(cct, tenant, i->second); + } else { + return none; + } } static int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& obj, map<string, bufferlist>& attrs) @@ -238,7 +263,6 @@ static int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& obj, map RGWRados::Object::Read read_op(&op_target); read_op.params.attrs = &attrs; - read_op.params.perr = &s->err; return read_op.prepare(); } @@ -250,7 +274,6 @@ static int modify_obj_attr(RGWRados *store, struct req_state *s, rgw_obj& obj, c RGWRados::Object::Read read_op(&op_target); read_op.params.attrs = &attrs; - read_op.params.perr = &s->err; int r = read_op.prepare(); if (r < 0) { @@ -302,7 +325,8 @@ static int read_obj_policy(RGWRados *store, struct req_state *s, RGWBucketInfo& bucket_info, map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, + RGWAccessControlPolicy* acl, + optional<Policy>& policy, rgw_bucket& bucket, rgw_obj_key& object) { @@ -324,9 +348,11 @@ static int read_obj_policy(RGWRados *store, } else { obj = rgw_obj(bucket, object); } + policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs, bucket.tenant); + RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx); int ret = get_obj_policy_from_attr(s->cct, store, *obj_ctx, - bucket_info, bucket_attrs, policy, obj); + bucket_info, bucket_attrs, acl, obj); if (ret == -ENOENT) { /* object does not exist checking the bucket's ACL to make sure that we send a proper error code */ @@ -361,7 +387,7 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s) int ret = 0; rgw_obj_key obj; RGWUserInfo bucket_owner_info; - RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx); + RGWObjectCtx obj_ctx(store); string bi = s->info.args.get(RGW_SYS_PARAM_PREFIX "bucket-instance"); if (!bi.empty()) { @@ -374,8 +400,13 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s) if(s->dialect.compare("s3") == 0) { s->bucket_acl = new RGWAccessControlPolicy_S3(s->cct); } else if(s->dialect.compare("swift") == 0) { - s->user_acl = std::unique_ptr<RGWAccessControlPolicy>( - new RGWAccessControlPolicy_SWIFTAcct(s->cct)); + /* We aren't allocating the account policy for those operations using + * the Swift's infrastructure that don't really need req_state::user. + * Typical example here is the implementation of /info. */ + if (!s->user->user_id.empty()) { + s->user_acl = std::unique_ptr<RGWAccessControlPolicy>( + new RGWAccessControlPolicy_SWIFTAcct(s->cct)); + } s->bucket_acl = new RGWAccessControlPolicy_SWIFT(s->cct); } else { s->bucket_acl = new RGWAccessControlPolicy(s->cct); @@ -497,6 +528,16 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s) } } + try { + s->iam_policy = get_iam_policy_from_attr(s->cct, store, s->bucket_attrs, + s->bucket_tenant); + } catch (const std::exception& e) { + // Really this is a can't happen condition. We parse the policy + // when it's given to us, so perhaps we should abort or otherwise + // raise bloody murder. + lderr(s->cct) << "Error reading IAM Policy: " << e.what() << dendl; + ret = -EACCES; + } return ret; } @@ -524,12 +565,72 @@ int rgw_build_object_policies(RGWRados *store, struct req_state *s, if (prefetch_data) { store->set_prefetch_data(s->obj_ctx, obj); } - ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs, s->object_acl, s->bucket, s->object); + ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs, s->object_acl, s->iam_policy, s->bucket, s->object); } return ret; } +rgw::IAM::Environment rgw_build_iam_environment(RGWRados* store, + struct req_state* s) +{ + rgw::IAM::Environment e; + const auto& m = s->info.env->get_map(); + auto t = ceph::real_clock::now(); + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:CurrentTime"), + std::forward_as_tuple(std::to_string( + ceph::real_clock::to_time_t(t)))); + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:EpochTime"), + std::forward_as_tuple(ceph::to_iso_8601(t))); + // TODO: This is fine for now, but once we have STS we'll need to + // look and see. Also this won't work with the IdentityApplier + // model, since we need to know the actual credential. + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:PrincipalType"), + std::forward_as_tuple("User")); + + auto i = m.find("HTTP_REFERER"); + if (i != m.end()) { + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:Referer"), + std::forward_as_tuple(i->second)); + } + + // These seem to be the semantics, judging from rest_rgw_s3.cc + i = m.find("SERVER_PORT_SECURE"); + if (i != m.end()) { + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:SecureTransport"), + std::forward_as_tuple("true")); + } + + i = m.find("HTTP_HOST"); + if (i != m.end()) { + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:SourceIp"), + std::forward_as_tuple(i->second)); + } + + i = m.find("HTTP_USER_AGENT"); { + if (i != m.end()) + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:UserAgent"), + std::forward_as_tuple(i->second)); + } + + if (s->user) { + // What to do about aws::userid? One can have multiple access + // keys so that isn't really suitable. Do we have a durable + // identifier that can persist through name changes? + e.emplace(std::piecewise_construct, + std::forward_as_tuple("aws:username"), + std::forward_as_tuple(s->user->user_id.id)); + } + return e; +} + static void rgw_bucket_object_pre_exec(struct req_state *s) { if (s->expect_cont) @@ -546,7 +647,21 @@ int RGWGetObj::verify_permission() store->set_prefetch_data(s->obj_ctx, obj); } - if (!verify_object_permission(s, RGW_PERM_READ)) { + if (torrent.get_flag()) { + if (obj.key.instance.empty()) { + action = rgw::IAM::s3GetObjectTorrent; + } else { + action = rgw::IAM::s3GetObjectVersionTorrent; + } + } else { + if (obj.key.instance.empty()) { + action = rgw::IAM::s3GetObject; + } else { + action = rgw::IAM::s3GetObjectVersion; + } + } + + if (!verify_object_permission(s, action)) { return -EACCES; } @@ -786,7 +901,8 @@ bool RGWOp::generate_cors_headers(string& origin, string& method, string& header int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_policy, + RGWAccessControlPolicy * const bucket_acl, + const optional<Policy>& bucket_policy, const off_t start_ofs, const off_t end_ofs) { @@ -817,7 +933,6 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket, read_op.conds.if_match = ent.meta.etag.c_str(); read_op.params.attrs = &attrs; read_op.params.obj_size = &obj_size; - read_op.params.perr = &s->err; op_ret = read_op.prepare(); if (op_ret < 0) @@ -862,8 +977,8 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket, ldout(s->cct, 2) << "overriding permissions due to system operation" << dendl; } else if (s->auth.identity->is_admin_of(s->user->user_id)) { ldout(s->cct, 2) << "overriding permissions due to admin operation" << dendl; - } else if (!verify_object_permission(s, s->user_acl.get(), bucket_policy, - &obj_policy, RGW_PERM_READ)) { + } else if (!verify_object_permission(s, part, s->user_acl.get(), bucket_acl, + &obj_policy, bucket_policy, action)) { return -EPERM; } @@ -885,13 +1000,15 @@ static int iterate_user_manifest_parts(CephContext * const cct, const off_t end, RGWBucketInfo *pbucket_info, const string& obj_prefix, - RGWAccessControlPolicy * const bucket_policy, + RGWAccessControlPolicy * const bucket_acl, + const optional<Policy>& bucket_policy, uint64_t * const ptotal_len, uint64_t * const pobj_size, string * const pobj_sum, int (*cb)(rgw_bucket& bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_policy, + RGWAccessControlPolicy * const bucket_acl, + const optional<Policy>& bucket_policy, off_t start_ofs, off_t end_ofs, void *param), @@ -947,7 +1064,7 @@ static int iterate_user_manifest_parts(CephContext * const cct, len_count += end_ofs - start_ofs; if (cb) { - r = cb(bucket, ent, bucket_policy, start_ofs, end_ofs, cb_param); + r = cb(bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs, cb_param); if (r < 0) { return r; } @@ -973,13 +1090,12 @@ static int iterate_user_manifest_parts(CephContext * const cct, } struct rgw_slo_part { - RGWAccessControlPolicy *bucket_policy; + RGWAccessControlPolicy *bucket_acl = nullptr; + Policy* bucket_policy = nullptr; rgw_bucket bucket; string obj_name; - uint64_t size; + uint64_t size = 0; string etag; - - rgw_slo_part() : bucket_policy(NULL), size(0) {} }; static int iterate_slo_parts(CephContext *cct, @@ -989,7 +1105,8 @@ static int iterate_slo_parts(CephContext *cct, map<uint64_t, rgw_slo_part>& slo_parts, int (*cb)(rgw_bucket& bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy *bucket_policy, + RGWAccessControlPolicy *bucket_acl, + const optional<Policy>& bucket_policy, off_t start_ofs, off_t end_ofs, void *param), @@ -1038,8 +1155,12 @@ static int iterate_slo_parts(CephContext *cct, if (found_start) { if (cb) { - int r = cb(part.bucket, ent, part.bucket_policy, start_ofs, end_ofs, cb_param); - if (r < 0) + // SLO is a Swift thing, and Swift has no knowledge of S3 Policies. + int r = cb(part.bucket, ent, part.bucket_acl, + (part.bucket_policy ? + optional<Policy>(*part.bucket_policy) : none), + start_ofs, end_ofs, cb_param); + if (r < 0) return r; } } @@ -1052,13 +1173,14 @@ static int iterate_slo_parts(CephContext *cct, static int get_obj_user_manifest_iterate_cb(rgw_bucket& bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_policy, + RGWAccessControlPolicy * const bucket_acl, + const optional<Policy>& bucket_policy, const off_t start_ofs, const off_t end_ofs, void * const param) { RGWGetObj *op = static_cast<RGWGetObj *>(param); - return op->read_user_manifest_part(bucket, ent, bucket_policy, start_ofs, end_ofs); + return op->read_user_manifest_part(bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs); } int RGWGetObj::handle_user_manifest(const char *prefix) @@ -1080,8 +1202,10 @@ int RGWGetObj::handle_user_manifest(const char *prefix) rgw_bucket bucket; - RGWAccessControlPolicy _bucket_policy(s->cct); - RGWAccessControlPolicy *bucket_policy; + RGWAccessControlPolicy _bucket_acl(s->cct); + RGWAccessControlPolicy *bucket_acl; + optional<Policy> _bucket_policy; + optional<Policy>* bucket_policy; RGWBucketInfo bucket_info; RGWBucketInfo *pbucket_info; @@ -1098,16 +1222,20 @@ int RGWGetObj::handle_user_manifest(const char *prefix) } bucket = bucket_info.bucket; pbucket_info = &bucket_info; - bucket_policy = &_bucket_policy; - r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_policy, bucket); + bucket_acl = &_bucket_acl; + r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl, bucket); if (r < 0) { ldout(s->cct, 0) << "failed to read bucket policy" << dendl; return r; } + _bucket_policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs, + bucket_info.bucket.tenant); + bucket_policy = &_bucket_policy; } else { bucket = s->bucket; pbucket_info = &s->bucket_info; - bucket_policy = s->bucket_acl; + bucket_acl = s->bucket_acl; + bucket_policy = &s->iam_policy; } /* dry run to find out: @@ -1115,7 +1243,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix) * - overall DLO's content size, * - md5 sum of overall DLO's content (for etag of Swift API). */ int r = iterate_user_manifest_parts(s->cct, store, ofs, end, - pbucket_info, obj_prefix, bucket_policy, + pbucket_info, obj_prefix, bucket_acl, *bucket_policy, &total_len, &s->obj_size, &lo_etag, nullptr /* cb */, nullptr /* cb arg */); if (r < 0) { @@ -1129,7 +1257,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix) } r = iterate_user_manifest_parts(s->cct, store, ofs, end, - pbucket_info, obj_prefix, bucket_policy, + pbucket_info, obj_prefix, bucket_acl, *bucket_policy, nullptr, nullptr, nullptr, get_obj_user_manifest_iterate_cb, (void *)this); if (r < 0) { @@ -1156,8 +1284,8 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl) } ldout(s->cct, 2) << "RGWGetObj::handle_slo_manifest()" << dendl; - list<RGWAccessControlPolicy> allocated_policies; - map<string, RGWAccessControlPolicy *> policies; + vector<RGWAccessControlPolicy> allocated_acls; + map<string, pair<RGWAccessControlPolicy *, optional<Policy>>> policies; map<string, rgw_bucket> buckets; map<uint64_t, rgw_slo_part> slo_parts; @@ -1189,16 +1317,18 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl) string obj_name = path.substr(pos_sep + 1); rgw_bucket bucket; - RGWAccessControlPolicy *bucket_policy; + RGWAccessControlPolicy *bucket_acl; + Policy* bucket_policy; if (bucket_name.compare(s->bucket.name) != 0) { const auto& piter = policies.find(bucket_name); if (piter != policies.end()) { - bucket_policy = piter->second; - bucket = buckets[bucket_name]; + bucket_acl = piter->second.first; + bucket_policy = piter->second.second.get_ptr(); + bucket = buckets[bucket_name]; } else { - allocated_policies.push_back(RGWAccessControlPolicy(s->cct)); - RGWAccessControlPolicy& _bucket_policy = allocated_policies.back(); + allocated_acls.push_back(RGWAccessControlPolicy(s->cct)); + RGWAccessControlPolicy& _bucket_acl = allocated_acls.back(); RGWBucketInfo bucket_info; map<string, bufferlist> bucket_attrs; @@ -1212,23 +1342,28 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl) return r; } bucket = bucket_info.bucket; - bucket_policy = &_bucket_policy; - r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_policy, + bucket_acl = &_bucket_acl; + r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl, bucket); if (r < 0) { - ldout(s->cct, 0) << "failed to read bucket policy for bucket " + ldout(s->cct, 0) << "failed to read bucket ACL for bucket " << bucket << dendl; return r; - } - buckets[bucket_name] = bucket; - policies[bucket_name] = bucket_policy; + } + auto _bucket_policy = get_iam_policy_from_attr( + s->cct, store, bucket_attrs, bucket_info.bucket.tenant); + bucket_policy = _bucket_policy.get_ptr(); + buckets[bucket_name] = bucket; + policies[bucket_name] = make_pair(bucket_acl, _bucket_policy); } } else { bucket = s->bucket; - bucket_policy = s->bucket_acl; + bucket_acl = s->bucket_acl; + bucket_policy = s->iam_policy.get_ptr(); } rgw_slo_part part; + part.bucket_acl = bucket_acl; part.bucket_policy = bucket_policy; part.bucket = bucket; part.obj_name = obj_name; @@ -1377,7 +1512,6 @@ void RGWGetObj::execute() read_op.params.attrs = &attrs; read_op.params.lastmod = &lastmod; read_op.params.obj_size = &s->obj_size; - read_op.params.perr = &s->err; op_ret = read_op.prepare(); if (op_ret < 0) @@ -1871,7 +2005,8 @@ void RGWDeleteBucketWebsite::execute() int RGWStatBucket::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_READ)) { + // This (a HEAD request on a bucket) is governed by the s3:ListBucket permission. + if (!verify_bucket_permission(s, rgw::IAM::s3ListBucket)) { return -EACCES; } @@ -1910,7 +2045,15 @@ void RGWStatBucket::execute() int RGWListBucket::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_READ)) { + op_ret = get_params(); + if (op_ret < 0) { + return op_ret; + } + + if (!verify_bucket_permission(s, + list_versions ? + rgw::IAM::s3ListBucketVersions : + rgw::IAM::s3ListBucket)) { return -EACCES; } @@ -1948,10 +2091,6 @@ void RGWListBucket::execute() return; } - op_ret = get_params(); - if (op_ret < 0) - return; - if (need_container_stats()) { map<string, RGWBucketEnt> m; m[s->bucket.name] = RGWBucketEnt(); @@ -2043,7 +2182,7 @@ int RGWCreateBucket::verify_permission() static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store, bufferlist& in_data, - JSONParser *jp) + JSONParser *jp, req_info *forward_info) { if (!store->rest_master_conn) { ldout(s->cct, 0) << "rest connection is invalid" << dendl; @@ -2053,9 +2192,8 @@ static int forward_request_to_master(struct req_state *s, obj_version *objv, bufferlist response; string uid_str = s->user->user_id.to_str(); #define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response - int ret = store->rest_master_conn->forward(uid_str, s->info, objv, - MAX_REST_RESPONSE, &in_data, - &response); + int ret = store->rest_master_conn->forward(uid_str, (forward_info ? *forward_info : s->info), + objv, MAX_REST_RESPONSE, &in_data, &response); if (ret < 0) return ret; @@ -2327,7 +2465,7 @@ void RGWCreateBucket::execute() bucket.name = s->bucket_name; op_ret = store->select_bucket_placement(*(s->user), zonegroup_id, placement_rule, - bucket, &selected_placement_rule, nullptr); + &selected_placement_rule, nullptr); if (selected_placement_rule != s->bucket_info.placement_rule) { op_ret = -EEXIST; return; @@ -2475,7 +2613,7 @@ void RGWCreateBucket::execute() int RGWDeleteBucket::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + if (!verify_bucket_permission(s, rgw::IAM::s3DeleteBucket)) { return -EACCES; } @@ -2572,7 +2710,8 @@ int RGWPutObj::verify_permission() { if (copy_source) { - RGWAccessControlPolicy cs_policy(s->cct); + RGWAccessControlPolicy cs_acl(s->cct); + optional<Policy> policy; map<string, bufferlist> cs_attrs; rgw_bucket cs_bucket(copy_source_bucket_info.bucket); rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id); @@ -2582,19 +2721,45 @@ int RGWPutObj::verify_permission() store->set_prefetch_data(s->obj_ctx, obj); /* check source object permissions */ - if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_policy, cs_bucket, cs_object) < 0) { + if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, policy, + cs_bucket, cs_object) < 0) { return -EACCES; } /* admin request overrides permission checks */ - if (! s->auth.identity->is_admin_of(cs_policy.get_owner().get_id()) && - ! cs_policy.verify_permission(*s->auth.identity, s->perm_mask, RGW_PERM_READ)) { - return -EACCES; + if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) { + if (policy) { + auto e = policy->eval(s->env, *s->auth.identity, + cs_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + rgw::IAM::ARN(obj)); + if (e == Effect::Deny) { + return -EACCES; + } else if (e == Effect::Pass && + !cs_acl.verify_permission(*s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } else if (!cs_acl.verify_permission(*s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } } + } + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } } - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; } @@ -2667,7 +2832,7 @@ int RGWPutObjProcessor_Multipart::do_complete(size_t accounted_size, map<string, bufferlist>& attrs, real_time delete_at, const char *if_match, - const char *if_nomatch) + const char *if_nomatch, const string *user_data) { complete_writing_data(); @@ -3027,7 +3192,7 @@ void RGWPutObj::execute() } bufferlist &data = data_in; - if (s->aws4_auth_streaming_mode) { + if (len && s->aws4_auth_streaming_mode) { /* use unwrapped data */ data = s->aws4_auth->bl; len = data.length(); @@ -3221,7 +3386,8 @@ void RGWPutObj::execute() } op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, - (delete_at ? *delete_at : real_time()), if_match, if_nomatch); + (delete_at ? *delete_at : real_time()), if_match, if_nomatch, + (user_data.empty() ? nullptr : &user_data)); /* produce torrent */ if (s->cct->_conf->rgw_torrent_flag && (ofs == torrent.get_data_len())) @@ -3271,16 +3437,10 @@ void RGWPostObj::pre_exec() void RGWPostObj::execute() { RGWPutObjDataProcessor *filter = nullptr; - std::unique_ptr<RGWPutObjDataProcessor> encrypt; - char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; - unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; - MD5 hash; - buffer::list bl, aclbl; - int len = 0; boost::optional<RGWPutObj_Compress> compressor; CompressorRef plugin; - // read in the data from the POST form + /* Read in the data from the POST form. */ op_ret = get_params(); if (op_ret < 0) { return; @@ -3291,121 +3451,151 @@ void RGWPostObj::execute() return; } - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (e == Effect::Deny) { + op_ret = -EACCES; + return; + } else if (e == Effect::Pass && !verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { + op_ret = -EACCES; + return; + } + } else if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { op_ret = -EACCES; return; } - op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket, - user_quota, bucket_quota, s->content_length); - if (op_ret < 0) { - return; - } - - RGWPutObjProcessor_Atomic processor(*static_cast<RGWObjectCtx *>(s->obj_ctx), - s->bucket_info, - s->bucket, - s->object.name, - /* part size */ - s->cct->_conf->rgw_obj_stripe_size, - s->req_id, - s->bucket_info.versioning_enabled()); + /* Start iteration over data fields. It's necessary as Swift's FormPost + * is capable to handle multiple files in single form. */ + do { + std::unique_ptr<RGWPutObjDataProcessor> encrypt; + char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + MD5 hash; + ceph::buffer::list bl, aclbl; + int len = 0; + + op_ret = store->check_quota(s->bucket_owner.get_id(), + s->bucket, + user_quota, + bucket_quota, + s->content_length); + if (op_ret < 0) { + return; + } - // no filters by default - filter = &processor; + RGWPutObjProcessor_Atomic processor(*static_cast<RGWObjectCtx *>(s->obj_ctx), + s->bucket_info, + s->bucket, + get_current_filename(), + /* part size */ + s->cct->_conf->rgw_obj_stripe_size, + s->req_id, + s->bucket_info.versioning_enabled()); + /* No filters by default. */ + filter = &processor; - op_ret = processor.prepare(store, nullptr); - if (op_ret < 0) - return; + op_ret = processor.prepare(store, nullptr); + if (op_ret < 0) { + return; + } - op_ret = get_encrypt_filter(&encrypt, filter); - if (op_ret < 0) { - return; - } - if (encrypt != nullptr) { - filter = encrypt.get(); - } else { - const auto& compression_type = store->get_zone_params().get_compression_type( - s->bucket_info.placement_rule); - if (compression_type != "none") { - plugin = Compressor::create(s->cct, compression_type); - if (!plugin) { - ldout(s->cct, 1) << "Cannot load plugin for compression type " - << compression_type << dendl; - } else { - compressor.emplace(s->cct, plugin, filter); - filter = &*compressor; + op_ret = get_encrypt_filter(&encrypt, filter); + if (op_ret < 0) { + return; + } + if (encrypt != nullptr) { + filter = encrypt.get(); + } else { + const auto& compression_type = store->get_zone_params().get_compression_type( + s->bucket_info.placement_rule); + if (compression_type != "none") { + plugin = Compressor::create(s->cct, compression_type); + if (!plugin) { + ldout(s->cct, 1) << "Cannot load plugin for compression type " + << compression_type << dendl; + } else { + compressor.emplace(s->cct, plugin, filter); + filter = &*compressor; + } } } - } - while (data_pending) { - bufferlist data; - len = get_data(data); + bool again; + do { + ceph::bufferlist data; + len = get_data(data, again); - if (len < 0) { - op_ret = len; - return; - } + if (len < 0) { + op_ret = len; + return; + } - if (!len) - break; + if (!len) { + break; + } - hash.Update((const byte *)data.c_str(), data.length()); - op_ret = put_data_and_throttle(filter, data, ofs, false); + hash.Update((const byte *)data.c_str(), data.length()); + op_ret = put_data_and_throttle(filter, data, ofs, false); - ofs += len; + ofs += len; - if (ofs > max_len) { - op_ret = -ERR_TOO_LARGE; - return; - } - } - { - bufferlist flush; - op_ret = put_data_and_throttle(filter, flush, ofs, false); - } - if (len < min_len) { - op_ret = -ERR_TOO_SMALL; - return; - } + if (ofs > max_len) { + op_ret = -ERR_TOO_LARGE; + return; + } + } while (again); - s->obj_size = ofs; + { + bufferlist flush; + op_ret = put_data_and_throttle(filter, flush, ofs, false); + } - op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket, - user_quota, bucket_quota, s->obj_size); - if (op_ret < 0) { - return; - } + if (len < min_len) { + op_ret = -ERR_TOO_SMALL; + return; + } - hash.Final(m); - buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); + s->obj_size = ofs; - etag = calc_md5; - bl.append(etag.c_str(), etag.size() + 1); - emplace_attr(RGW_ATTR_ETAG, std::move(bl)); + op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket, + user_quota, bucket_quota, s->obj_size); + if (op_ret < 0) { + return; + } - policy.encode(aclbl); - emplace_attr(RGW_ATTR_ACL, std::move(aclbl)); + hash.Final(m); + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); - if (content_type.size()) { - bufferlist ct_bl; - ct_bl.append(content_type.c_str(), content_type.size() + 1); - emplace_attr(RGW_ATTR_CONTENT_TYPE, std::move(ct_bl)); - } + etag = calc_md5; + bl.append(etag.c_str(), etag.size() + 1); + emplace_attr(RGW_ATTR_ETAG, std::move(bl)); - if (compressor && compressor->is_compressed()) { - bufferlist tmp; - RGWCompressionInfo cs_info; - cs_info.compression_type = plugin->get_type_name(); - cs_info.orig_size = s->obj_size; - cs_info.blocks = move(compressor->get_compression_blocks()); - ::encode(cs_info, tmp); - emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp)); - } + policy.encode(aclbl); + emplace_attr(RGW_ATTR_ACL, std::move(aclbl)); + + const std::string content_type = get_current_content_type(); + if (! content_type.empty()) { + ceph::bufferlist ct_bl; + ct_bl.append(content_type.c_str(), content_type.size() + 1); + emplace_attr(RGW_ATTR_CONTENT_TYPE, std::move(ct_bl)); + } + + if (compressor && compressor->is_compressed()) { + ceph::bufferlist tmp; + RGWCompressionInfo cs_info; + cs_info.compression_type = plugin->get_type_name(); + cs_info.orig_size = s->obj_size; + cs_info.blocks = move(compressor->get_compression_blocks()); + ::encode(cs_info, tmp); + emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp)); + } - op_ret = processor.complete(s->obj_size, etag, NULL, real_time(), attrs, - (delete_at ? *delete_at : real_time())); + op_ret = processor.complete(s->obj_size, etag, nullptr, real_time(), + attrs, (delete_at ? *delete_at : real_time())); + } while (is_next_file_to_upload()); } @@ -3538,7 +3728,7 @@ void RGWPutMetadataAccount::execute() int RGWPutMetadataBucket::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; } @@ -3569,6 +3759,12 @@ void RGWPutMetadataBucket::execute() * the hood. This method will add the new items only if the map doesn't * contain such keys yet. */ if (has_policy) { + if (s->dialect.compare("swift") == 0) { + auto old_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl); + auto new_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(&policy); + new_policy->filter_merge(policy_rw_mask, old_policy); + policy = *new_policy; + } buffer::list bl; policy.encode(bl); emplace_attr(RGW_ATTR_ACL, std::move(bl)); @@ -3611,7 +3807,9 @@ void RGWPutMetadataBucket::execute() int RGWPutMetadataObject::verify_permission() { - if (!verify_object_permission(s, RGW_PERM_WRITE)) { + // This looks to be something specific to Swift. We could add + // operations like swift:PutMetadataObject to the Policy Engine. + if (!verify_object_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; } @@ -3722,7 +3920,19 @@ int RGWDeleteObj::handle_slo_manifest(bufferlist& bl) int RGWDeleteObj::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + if (s->iam_policy) { + auto r = s->iam_policy->eval(s->env, *s->auth.identity, + s->object.instance.empty() ? + rgw::IAM::s3DeleteObject : + rgw::IAM::s3DeleteObjectVersion, + ARN(s->bucket, s->object.name)); + if (r == Effect::Allow) + return true; + else if (r == Effect::Deny) + return false; + } + + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; } @@ -3872,7 +4082,8 @@ bool RGWCopyObj::parse_copy_location(const string& url_src, string& bucket_name, int RGWCopyObj::verify_permission() { - RGWAccessControlPolicy src_policy(s->cct); + RGWAccessControlPolicy src_acl(s->cct); + optional<Policy> src_policy; op_ret = get_params(); if (op_ret < 0) return op_ret; @@ -3907,17 +4118,32 @@ int RGWCopyObj::verify_permission() store->set_prefetch_data(s->obj_ctx, src_obj); /* check source object permissions */ - op_ret = read_obj_policy(store, s, src_bucket_info, src_attrs, &src_policy, - src_bucket, src_object); + op_ret = read_obj_policy(store, s, src_bucket_info, src_attrs, &src_acl, + src_policy, src_bucket, src_object); if (op_ret < 0) { return op_ret; } /* admin request overrides permission checks */ - if (! s->auth.identity->is_admin_of(src_policy.get_owner().get_id()) && - ! src_policy.verify_permission(*s->auth.identity, s->perm_mask, - RGW_PERM_READ)) { - return -EACCES; + if (!s->auth.identity->is_admin_of(src_acl.get_owner().get_id())) { + if (src_policy) { + auto e = src_policy->eval(s->env, *s->auth.identity, + src_object.instance.empty() ? + rgw::IAM::s3GetObject : + rgw::IAM::s3GetObjectVersion, + ARN(src_obj)); + if (e == Effect::Deny) { + return -EACCES; + } else if (e == Effect::Pass && + !src_acl.verify_permission(*s->auth.identity, s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } + } else if (!src_acl.verify_permission(*s->auth.identity, + s->perm_mask, + RGW_PERM_READ)) { + return -EACCES; + } } } @@ -4070,7 +4296,6 @@ void RGWCopyObj::execute() (version_id.empty() ? NULL : &version_id), &s->req_id, /* use req_id as tag */ &etag, - &s->err, copy_obj_progress_cb, (void *)this ); } @@ -4079,9 +4304,12 @@ int RGWGetACLs::verify_permission() { bool perm; if (!s->object.empty()) { - perm = verify_object_permission(s, RGW_PERM_READ_ACP); + perm = verify_object_permission(s, + s->object.instance.empty() ? + rgw::IAM::s3GetObjectAcl : + rgw::IAM::s3GetObjectVersionAcl); } else { - perm = verify_bucket_permission(s, RGW_PERM_READ_ACP); + perm = verify_bucket_permission(s, rgw::IAM::s3GetObjectAcl); } if (!perm) return -EACCES; @@ -4109,9 +4337,12 @@ int RGWPutACLs::verify_permission() { bool perm; if (!s->object.empty()) { - perm = verify_object_permission(s, RGW_PERM_WRITE_ACP); + perm = verify_object_permission(s, + s->object.instance.empty() ? + rgw::IAM::s3PutObjectAcl : + rgw::IAM::s3PutObjectVersionAcl); } else { - perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP); + perm = verify_bucket_permission(s, rgw::IAM::s3PutBucketAcl); } if (!perm) return -EACCES; @@ -4122,7 +4353,7 @@ int RGWPutACLs::verify_permission() int RGWGetLC::verify_permission() { bool perm; - perm = verify_bucket_permission(s, RGW_PERM_READ_ACP); + perm = verify_bucket_permission(s, rgw::IAM::s3GetLifecycleConfiguration); if (!perm) return -EACCES; @@ -4132,7 +4363,7 @@ int RGWGetLC::verify_permission() int RGWPutLC::verify_permission() { bool perm; - perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP); + perm = verify_bucket_permission(s, rgw::IAM::s3PutLifecycleConfiguration); if (!perm) return -EACCES; @@ -4142,7 +4373,7 @@ int RGWPutLC::verify_permission() int RGWDeleteLC::verify_permission() { bool perm; - perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP); + perm = verify_bucket_permission(s, rgw::IAM::s3PutLifecycleConfiguration); if (!perm) return -EACCES; @@ -4608,8 +4839,20 @@ void RGWSetRequestPayment::execute() int RGWInitMultipart::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } + } + + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; + } return 0; } @@ -4713,8 +4956,20 @@ static int get_multipart_info(RGWRados *store, struct req_state *s, int RGWCompleteMultipart::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, + rgw_obj(s->bucket, s->object)); + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } + } + + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; + } return 0; } @@ -4959,8 +5214,20 @@ void RGWCompleteMultipart::execute() int RGWAbortMultipart::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3AbortMultipartUpload, + rgw_obj(s->bucket, s->object)); + if (e == Effect::Allow) { + return 0; + } else if (e == Effect::Deny) { + return -EACCES; + } + } + + if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) { return -EACCES; + } return 0; } @@ -4996,7 +5263,7 @@ void RGWAbortMultipart::execute() int RGWListMultipart::verify_permission() { - if (!verify_object_permission(s, RGW_PERM_READ)) + if (!verify_object_permission(s, rgw::IAM::s3ListMultipartUploadParts)) return -EACCES; return 0; @@ -5030,7 +5297,8 @@ void RGWListMultipart::execute() int RGWListBucketMultiparts::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_READ)) + if (!verify_bucket_permission(s, + rgw::IAM::s3ListBucketMultiPartUploads)) return -EACCES; return 0; @@ -5102,7 +5370,8 @@ void RGWGetHealthCheck::execute() int RGWDeleteMultiObj::verify_permission() { - if (!verify_bucket_permission(s, RGW_PERM_WRITE)) + acl_allowed = verify_bucket_permission_no_policy(s, RGW_PERM_WRITE); + if (!acl_allowed && !s->iam_policy) return -EACCES; return 0; @@ -5159,6 +5428,19 @@ void RGWDeleteMultiObj::execute() iter != multi_delete->objects.end() && num_processed < max_to_delete; ++iter, num_processed++) { rgw_obj obj(bucket, *iter); + if (s->iam_policy) { + auto e = s->iam_policy->eval(s->env, + *s->auth.identity, + iter->instance.empty() ? + rgw::IAM::s3DeleteObject : + rgw::IAM::s3DeleteObjectVersion, + obj); + if ((e == Effect::Deny) || + (e == Effect::Pass && !acl_allowed)) { + send_partial_response(*iter, false, "", -EACCES); + continue; + } + } obj_ctx->obj.set_atomic(obj); @@ -5205,11 +5487,14 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo, return false; } + auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant); + bucket_owner = bacl.get_owner(); /* We can use global user_acl because each BulkDelete request is allowed * to work on entities from a single account only. */ - return verify_bucket_permission(s, s->user_acl.get(), &bacl, RGW_PERM_WRITE); + return verify_bucket_permission(s, binfo.bucket, s->user_acl.get(), + &bacl, policy, rgw::IAM::s3DeleteBucket); } bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path) @@ -5422,7 +5707,28 @@ RGWBulkUploadOp::parse_path(const boost::string_ref& path) } } - return boost::none; + return none; +} + +std::pair<std::string, std::string> +RGWBulkUploadOp::handle_upload_path(struct req_state *s) +{ + std::string bucket_path, file_prefix; + if (! s->init_state.url_bucket.empty()) { + file_prefix = bucket_path = s->init_state.url_bucket + "/"; + if (! s->object.empty()) { + std::string& object_name = s->object.name; + + /* As rgw_obj_key::empty() already verified emptiness of s->object.name, + * we can safely examine its last element. */ + if (object_name.back() == '/') { + file_prefix.append(object_name); + } else { + file_prefix.append(object_name).append("/"); + } + } + } + return std::make_pair(bucket_path, file_prefix); } int RGWBulkUploadOp::handle_dir_verify_permission() @@ -5446,6 +5752,20 @@ int RGWBulkUploadOp::handle_dir_verify_permission() return 0; } +static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name) +{ + /* the request of container or object level will contain bucket name. + * only at account level need to append the bucket name */ + if (info.script_uri.find(bucket_name) != std::string::npos) { + return; + } + + ldout(cct, 20) << "append the bucket: "<< bucket_name << " to req_info" << dendl; + info.script_uri.append("/").append(bucket_name); + info.request_uri_aws4 = info.request_uri = info.script_uri; + info.effective_uri = "/" + bucket_name; +} + int RGWBulkUploadOp::handle_dir(const boost::string_ref path) { ldout(s->cct, 20) << "bulk upload: got directory=" << path << dendl; @@ -5462,14 +5782,6 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path) rgw_raw_obj obj(store->get_zone_params().domain_root, rgw_make_bucket_entry_name(s->bucket_tenant, bucket_name)); - /* Swift API doesn't support location constraint. We're just checking here - * whether creation is taking place in the master zone or not. */ - if (! store->get_zonegroup().is_master) { - ldout(s->cct, 0) << "creating bucket in a non-master zone." << dendl; - op_ret = -EINVAL; - return op_ret; - } - /* we need to make sure we read bucket info, it's not read before for this * specific request */ RGWBucketInfo binfo; @@ -5502,7 +5814,9 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path) if (! store->is_meta_master()) { JSONParser jp; ceph::bufferlist in_data; - op_ret = forward_request_to_master(s, nullptr, store, in_data, &jp); + req_info info = s->info; + forward_req_info(s->cct, info, bucket_name); + op_ret = forward_request_to_master(s, nullptr, store, in_data, &jp, &info); if (op_ret < 0) { return op_ret; } @@ -5535,7 +5849,6 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path) op_ret = store->select_bucket_placement(*(s->user), store->get_zonegroup().get_id(), placement_rule, - bucket, &selected_placement_rule, nullptr); if (selected_placement_rule != binfo.placement_rule) { @@ -5615,6 +5928,7 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path) bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo, + const rgw_obj& obj, std::map<std::string, ceph::bufferlist>& battrs, ACLOwner& bucket_owner /* out */) { @@ -5626,8 +5940,21 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo, return false; } + auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant); + bucket_owner = bacl.get_owner(); - return verify_bucket_permission(s, s->user_acl.get(), &bacl, RGW_PERM_WRITE); + if (policy) { + auto e = policy->eval(s->env, *s->auth.identity, + rgw::IAM::s3PutObject, obj); + if (e == Effect::Allow) { + return true; + } else if (e == Effect::Deny) { + return false; + } + } + + return verify_bucket_permission_no_policy(s, s->user_acl.get(), + &bacl, RGW_PERM_WRITE); } int RGWBulkUploadOp::handle_file(const boost::string_ref path, @@ -5663,7 +5990,9 @@ int RGWBulkUploadOp::handle_file(const boost::string_ref path, return op_ret; } - if (! handle_file_verify_permission(binfo, battrs, bowner)) { + if (! handle_file_verify_permission(binfo, + rgw_obj(binfo.bucket, object), + battrs, bowner)) { ldout(s->cct, 20) << "bulk upload: object creation unauthorized" << dendl; op_ret = -EACCES; return op_ret; @@ -5802,6 +6131,11 @@ void RGWBulkUploadOp::execute() return; } + /* Handling the $UPLOAD_PATH accordingly to the Swift's Bulk middleware. See: + * https://github.com/openstack/swift/blob/2.13.0/swift/common/middleware/bulk.py#L31-L41 */ + std::string bucket_path, file_prefix; + std::tie(bucket_path, file_prefix) = handle_upload_path(s); + auto status = rgw::tar::StatusIndicator::create(); do { op_ret = stream->get_exactly(rgw::tar::BLOCK_SIZE, buffer); @@ -5826,25 +6160,28 @@ void RGWBulkUploadOp::execute() case rgw::tar::FileType::NORMAL_FILE: { ldout(s->cct, 2) << "bulk upload: handling regular file" << dendl; + boost::string_ref filename = bucket_path.empty() ? header->get_filename() : \ + file_prefix + header->get_filename().to_string(); auto body = AlignedStreamGetter(0, header->get_filesize(), rgw::tar::BLOCK_SIZE, *stream); - op_ret = handle_file(header->get_filename(), + op_ret = handle_file(filename, header->get_filesize(), body); if (! op_ret) { /* Only regular files counts. */ num_created++; } else { - failures.emplace_back(op_ret, header->get_filename().to_string()); + failures.emplace_back(op_ret, filename.to_string()); } break; } case rgw::tar::FileType::DIRECTORY: { ldout(s->cct, 2) << "bulk upload: handling regular directory" << dendl; - op_ret = handle_dir(header->get_filename()); + boost::string_ref dirname = bucket_path.empty() ? header->get_filename() : bucket_path; + op_ret = handle_dir(dirname); if (op_ret < 0 && op_ret != -ERR_BUCKET_EXISTS) { - failures.emplace_back(op_ret, header->get_filename().to_string()); + failures.emplace_back(op_ret, dirname.to_string()); } break; } @@ -5905,11 +6242,13 @@ ssize_t RGWBulkUploadOp::AlignedStreamGetter::get_exactly(const size_t want, int RGWSetAttrs::verify_permission() { + // This looks to be part of the RGW-NFS machinery and has no S3 or + // Swift equivalent. bool perm; if (!s->object.empty()) { - perm = verify_object_permission(s, RGW_PERM_WRITE); + perm = verify_object_permission_no_policy(s, RGW_PERM_WRITE); } else { - perm = verify_bucket_permission(s, RGW_PERM_WRITE); + perm = verify_bucket_permission_no_policy(s, RGW_PERM_WRITE); } if (!perm) return -EACCES; @@ -5985,6 +6324,7 @@ int RGWHandler::init(RGWRados *_store, int RGWHandler::do_init_permissions() { int ret = rgw_build_bucket_policies(store, s); + s->env = rgw_build_iam_environment(store, s); if (ret < 0) { ldout(s->cct, 10) << "read_permissions on " << s->bucket << " ret=" << ret << dendl; @@ -6022,3 +6362,110 @@ int RGWHandler::error_handler(int err_no, string *error_content) { // This is the do-nothing error handler return err_no; } + + +void RGWPutBucketPolicy::send_response() +{ + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s); +} + +int RGWPutBucketPolicy::verify_permission() +{ + if (!verify_bucket_permission(s, rgw::IAM::s3PutBucketPolicy)) { + return -EACCES; + } + + return 0; +} + +int RGWPutBucketPolicy::get_params() +{ + const auto max_size = s->cct->_conf->rgw_max_put_param_size; + // At some point when I have more time I want to make a version of + // rgw_rest_read_all_input that doesn't use malloc. + op_ret = rgw_rest_read_all_input(s, &data, &len, max_size, false); + // And throws exceptions. + return op_ret; +} + +void RGWPutBucketPolicy::execute() +{ + op_ret = get_params(); + if (op_ret < 0) { + return; + } + + try { + Policy p(s->cct, s->bucket_tenant, + bufferlist::static_from_mem(data, len)); + auto attrs = s->bucket_attrs; + attrs[RGW_ATTR_IAM_POLICY].append(p.text); + op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, + &s->bucket_info.objv_tracker); + if (op_ret == -ECANCELED) { + op_ret = 0; /* lost a race, but it's ok because policies are immutable */ + } + } catch (rgw::IAM::PolicyParseException& e) { + op_ret = -EINVAL; + } +} + +void RGWGetBucketPolicy::send_response() +{ + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s); + dump_start(s); + rgw_flush_formatter(s, s->formatter); + dump_body(s, policy); +} + +int RGWGetBucketPolicy::verify_permission() +{ + if (!verify_bucket_permission(s, rgw::IAM::s3GetBucketPolicy)) { + return -EACCES; + } + + return 0; +} + +void RGWGetBucketPolicy::execute() +{ + auto attrs = s->bucket_attrs; + policy = attrs[RGW_ATTR_IAM_POLICY]; +} + +void RGWDeleteBucketPolicy::send_response() +{ + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s); +} + +int RGWDeleteBucketPolicy::verify_permission() +{ + if (!verify_bucket_permission(s, rgw::IAM::s3DeleteBucketPolicy)) { + return -EACCES; + } + + return 0; +} + +void RGWDeleteBucketPolicy::execute() +{ + auto attrs = s->bucket_attrs; + attrs.erase(RGW_ATTR_IAM_POLICY); + op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, + &s->bucket_info.objv_tracker); + if (op_ret == -ECANCELED) { + op_ret = 0; /* lost a race, but it's ok because policies are immutable */ + } +} diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 11c88a37fa2..1b4f02f1b7f 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -1,4 +1,4 @@ -// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /** * All operations via the rados gateway are carried out by @@ -22,6 +22,7 @@ #include <boost/optional.hpp> #include <boost/utility/in_place_factory.hpp> +#include <boost/function.hpp> #include "common/armor.h" #include "common/mime.h" @@ -43,7 +44,6 @@ #include "include/assert.h" -using namespace std; using ceph::crypto::SHA1; struct req_state; @@ -93,6 +93,7 @@ public: virtual int authorize() = 0; virtual int postauth_init() = 0; virtual int error_handler(int err_no, std::string* error_content); + virtual void dump(const string& code, const string& message) const {} }; @@ -211,6 +212,7 @@ protected: bool first_data; uint64_t cur_ofs; bufferlist waiting; + uint64_t action = 0; int init_common(); public: @@ -249,11 +251,13 @@ public: int verify_permission() override; void pre_exec() override; void execute() override; - int read_user_manifest_part(rgw_bucket& bucket, - const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy *bucket_policy, - off_t start_ofs, - off_t end_ofs); + int read_user_manifest_part( + rgw_bucket& bucket, + const rgw_bucket_dir_entry& ent, + RGWAccessControlPolicy * const bucket_acl, + const boost::optional<rgw::IAM::Policy>& bucket_policy, + const off_t start_ofs, + const off_t end_ofs); int handle_user_manifest(const char *prefix); int handle_slo_manifest(bufferlist& bl); @@ -427,9 +431,13 @@ protected: boost::optional<std::pair<std::string, rgw_obj_key>> parse_path(const boost::string_ref& path); + + std::pair<std::string, std::string> + handle_upload_path(struct req_state *s); bool handle_file_verify_permission(RGWBucketInfo& binfo, - std::map<std::string, ceph::bufferlist>& battrs, + const rgw_obj& obj, + std::map<std::string, ceph::bufferlist>& battrs, ACLOwner& bucket_owner /* out */); int handle_file(boost::string_ref path, size_t size, @@ -926,6 +934,7 @@ protected: string version_id; bufferlist bl_aux; map<string, string> crypt_http_responses; + string user_data; boost::optional<ceph::real_time> delete_at; @@ -1013,21 +1022,23 @@ protected: const char *supplied_md5_b64; const char *supplied_etag; string etag; - string boundary; - bool data_pending; - string content_type; RGWAccessControlPolicy policy; map<string, bufferlist> attrs; boost::optional<ceph::real_time> delete_at; + /* Must be called after get_data() or the result is undefined. */ + virtual std::string get_current_filename() const = 0; + virtual std::string get_current_content_type() const = 0; + virtual bool is_next_file_to_upload() { + return false; + } public: RGWPostObj() : min_len(0), max_len(LLONG_MAX), len(0), ofs(0), supplied_md5_b64(nullptr), - supplied_etag(nullptr), - data_pending(false) { + supplied_etag(nullptr) { } void emplace_attr(std::string&& key, buffer::list&& bl) { @@ -1048,9 +1059,9 @@ public: return 0; } virtual int get_params() = 0; - virtual int get_data(bufferlist& bl) = 0; + virtual int get_data(ceph::bufferlist& bl, bool& again) = 0; void send_response() override = 0; - const string name() override { return "post_obj"; } + const std::string name() override { return "post_obj"; } RGWOpType get_type() override { return RGW_OP_POST_OBJ; } uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } }; @@ -1098,6 +1109,7 @@ protected: map<string, buffer::list> attrs; set<string> rmattr_names; bool has_policy, has_cors; + uint32_t policy_rw_mask; RGWAccessControlPolicy policy; RGWCORSConfiguration cors_config; string placement_rule; @@ -1105,7 +1117,7 @@ protected: public: RGWPutMetadataBucket() - : has_policy(false), has_cors(false) + : has_policy(false), has_cors(false), policy_rw_mask(0) {} void emplace_attr(std::string&& key, buffer::list&& bl) { @@ -1688,6 +1700,7 @@ protected: rgw_bucket bucket; bool quiet; bool status_dumped; + bool acl_allowed = false; public: RGWDeleteMultiObj() { @@ -1725,7 +1738,9 @@ public: extern int rgw_build_bucket_policies(RGWRados* store, struct req_state* s); extern int rgw_build_object_policies(RGWRados *store, struct req_state *s, - bool prefetch_data); + bool prefetch_data); +extern rgw::IAM::Environment rgw_build_iam_environment(RGWRados* store, + struct req_state* s); static inline int put_data_and_throttle(RGWPutObjDataProcessor *processor, bufferlist& data, off_t ofs, @@ -1935,6 +1950,66 @@ public: virtual uint32_t op_mask() { return RGW_OP_TYPE_READ; } }; +class RGWPutBucketPolicy : public RGWOp { + int len; + char *data = nullptr; +public: + RGWPutBucketPolicy() = default; + ~RGWPutBucketPolicy() { + if (data) { + free(static_cast<void*>(data)); + } + } + void send_response() override; + int verify_permission() override; + uint32_t op_mask() override { + return RGW_OP_TYPE_WRITE; + } + void execute() override; + int get_params(); + const std::string name() override { + return "put_bucket_policy"; + } + RGWOpType get_type() override { + return RGW_OP_PUT_BUCKET_POLICY; + } +}; + +class RGWGetBucketPolicy : public RGWOp { + buffer::list policy; +public: + RGWGetBucketPolicy() = default; + void send_response() override; + int verify_permission() override; + uint32_t op_mask() override { + return RGW_OP_TYPE_READ; + } + void execute() override; + const std::string name() override { + return "get_bucket_policy"; + } + RGWOpType get_type() override { + return RGW_OP_GET_BUCKET_POLICY; + } +}; + +class RGWDeleteBucketPolicy : public RGWOp { +public: + RGWDeleteBucketPolicy() = default; + void send_response() override; + int verify_permission() override; + uint32_t op_mask() override { + return RGW_OP_TYPE_WRITE; + } + void execute() override; + int get_params(); + const std::string name() override { + return "delete_bucket_policy"; + } + RGWOpType get_type() override { + return RGW_OP_DELETE_BUCKET_POLICY; + } +}; #endif /* CEPH_RGW_OP_H */ diff --git a/src/rgw/rgw_period_history.cc b/src/rgw/rgw_period_history.cc index eff0e78ad9d..895700f6897 100644 --- a/src/rgw/rgw_period_history.cc +++ b/src/rgw/rgw_period_history.cc @@ -69,6 +69,15 @@ bool Cursor::has_next() const return epoch < history->get_newest_epoch(); } +bool operator==(const Cursor& lhs, const Cursor& rhs) +{ + return lhs.history == rhs.history && lhs.epoch == rhs.epoch; +} + +bool operator!=(const Cursor& lhs, const Cursor& rhs) +{ + return !(lhs == rhs); +} class RGWPeriodHistory::Impl final { public: diff --git a/src/rgw/rgw_period_history.h b/src/rgw/rgw_period_history.h index 9541493aa14..0796c6116b5 100644 --- a/src/rgw/rgw_period_history.h +++ b/src/rgw/rgw_period_history.h @@ -75,6 +75,9 @@ class RGWPeriodHistory final { void prev() { epoch--; } void next() { epoch++; } + friend bool operator==(const Cursor& lhs, const Cursor& rhs); + friend bool operator!=(const Cursor& lhs, const Cursor& rhs); + private: // private constructors for RGWPeriodHistory friend class RGWPeriodHistory::Impl; diff --git a/src/rgw/rgw_period_pusher.cc b/src/rgw/rgw_period_pusher.cc index d6b2eabec39..75b454cf1c6 100644 --- a/src/rgw/rgw_period_pusher.cc +++ b/src/rgw/rgw_period_pusher.cc @@ -135,10 +135,11 @@ class RGWPeriodPusher::CRThread { std::map<std::string, RGWRESTConn>&& conns) : coroutines(cct, NULL), http(cct, coroutines.get_completion_mgr()), - push_all(new PushAllCR(cct, &http, std::move(period), std::move(conns))), - thread([this] { coroutines.run(push_all.get()); }) + push_all(new PushAllCR(cct, &http, std::move(period), std::move(conns))) { http.set_threaded(); + // must spawn the CR thread after set_threaded + thread = std::thread([this] { coroutines.run(push_all.get()); }); } ~CRThread() { diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h index 83c59a4cc37..005f2db6fef 100644 --- a/src/rgw/rgw_process.h +++ b/src/rgw/rgw_process.h @@ -17,6 +17,8 @@ #include "common/WorkQueue.h" #include "common/Throttle.h" +#include <atomic> + #if !defined(dout_subsys) #define dout_subsys ceph_subsys_rgw #define def_dout_subsys @@ -182,7 +184,7 @@ public: void checkpoint(); void handle_request(RGWRequest* req) override; void gen_request(const string& method, const string& resource, - int content_length, atomic_t* fail_flag); + int content_length, std::atomic<int64_t>* fail_flag); void set_access_key(RGWAccessKey& key) { access_key = key; } }; diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index 02347ff743d..6a61400a110 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -26,6 +26,8 @@ #include "rgw_bucket.h" #include "rgw_user.h" +#include <atomic> + #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw @@ -410,7 +412,7 @@ void UserAsyncRefreshHandler::handle_response(int r) } class RGWUserStatsCache : public RGWQuotaCache<rgw_user> { - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; RWLock rwlock; map<rgw_bucket, rgw_user> modified_buckets; @@ -569,11 +571,11 @@ public: } bool going_down() { - return (down_flag.read() != 0); + return down_flag; } void stop() { - down_flag.set(1); + down_flag = true; rwlock.get_write(); stop_thread(&buckets_sync_thread); rwlock.unlock(); diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h index d55dcaff952..9291434634c 100644 --- a/src/rgw/rgw_quota.h +++ b/src/rgw/rgw_quota.h @@ -15,11 +15,11 @@ #ifndef CEPH_RGW_QUOTA_H #define CEPH_RGW_QUOTA_H - #include "include/utime.h" -#include "include/atomic.h" #include "common/lru_map.h" +#include <atomic> + static inline int64_t rgw_rounded_kb(int64_t bytes) { return (bytes + 1023) / 1024; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index eb4fbeda6a2..f3fb6dc2ea3 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -55,6 +55,7 @@ using namespace librados; #include <string> #include <iostream> #include <vector> +#include <atomic> #include <list> #include <map> #include "auth/Crypto.h" // get_random_bytes() @@ -71,14 +72,13 @@ using namespace librados; #include "compressor/Compressor.h" +#include <atomic> + #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw using namespace std; -static RGWCache<RGWRados> cached_rados_provider; -static RGWRados rados_provider; - static string notify_oid_prefix = "notify"; static string *notify_oids = NULL; static string shadow_ns = "shadow"; @@ -1428,33 +1428,62 @@ void RGWPeriod::fork() realm_epoch++; } -int RGWPeriod::update_sync_status() +static int read_sync_status(RGWRados *store, rgw_meta_sync_status *sync_status) { - // must be new period's master zone to write sync status - if (master_zone != store->get_zone_params().get_id()) { - ldout(cct, 0) << "my zone " << store->get_zone_params().get_id() - << " is not period's master zone " << master_zone << dendl; - return -EINVAL; + // initialize a sync status manager to read the status + RGWMetaSyncStatusManager mgr(store, store->get_async_rados()); + int r = mgr.init(); + if (r < 0) { + return r; } + r = mgr.read_sync_status(sync_status); + mgr.stop(); + return r; +} - auto mdlog = store->meta_mgr->get_log(get_id()); - const auto num_shards = cct->_conf->rgw_md_log_max_shards; +int RGWPeriod::update_sync_status(const RGWPeriod ¤t_period, + std::ostream& error_stream, + bool force_if_stale) +{ + rgw_meta_sync_status status; + int r = read_sync_status(store, &status); + if (r < 0) { + ldout(cct, 0) << "period failed to read sync status: " + << cpp_strerror(-r) << dendl; + return r; + } std::vector<std::string> markers; - markers.reserve(num_shards); - // gather the markers for each shard - // TODO: use coroutines to read them in parallel - for (int i = 0; i < num_shards; i++) { - RGWMetadataLogInfo info; - int r = mdlog->get_info(i, &info); - if (r < 0) { - ldout(cct, 0) << "period failed to get metadata log info for shard " << i - << ": " << cpp_strerror(-r) << dendl; - return r; + const auto current_epoch = current_period.get_realm_epoch(); + if (current_epoch != status.sync_info.realm_epoch) { + // no sync status markers for the current period + assert(current_epoch > status.sync_info.realm_epoch); + const int behind = current_epoch - status.sync_info.realm_epoch; + if (!force_if_stale && current_epoch > 1) { + error_stream << "ERROR: This zone is " << behind << " period(s) behind " + "the current master zone in metadata sync. If this zone is promoted " + "to master, any metadata changes during that time are likely to " + "be lost.\n" + "Waiting for this zone to catch up on metadata sync (see " + "'radosgw-admin sync status') is recommended.\n" + "To promote this zone to master anyway, add the flag " + "--yes-i-really-mean-it." << std::endl; + return -EINVAL; + } + // empty sync status markers - other zones will skip this period during + // incremental metadata sync + markers.resize(status.sync_info.num_shards); + } else { + markers.reserve(status.sync_info.num_shards); + for (auto& i : status.sync_markers) { + auto& marker = i.second; + // filter out markers from other periods + if (marker.realm_epoch != current_epoch) { + marker.marker.clear(); + } + markers.emplace_back(std::move(marker.marker)); } - ldout(cct, 15) << "got shard " << i << " marker " << info.marker << dendl; - markers.emplace_back(std::move(info.marker)); } std::swap(sync_status, markers); @@ -1462,7 +1491,7 @@ int RGWPeriod::update_sync_status() } int RGWPeriod::commit(RGWRealm& realm, const RGWPeriod& current_period, - std::ostream& error_stream) + std::ostream& error_stream, bool force_if_stale) { ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl; // gateway must be in the master zone to commit @@ -1492,7 +1521,7 @@ int RGWPeriod::commit(RGWRealm& realm, const RGWPeriod& current_period, // did the master zone change? if (master_zone != current_period.get_master_zone()) { // store the current metadata sync status in the period - int r = update_sync_status(); + int r = update_sync_status(current_period, error_stream, force_if_stale); if (r < 0) { ldout(cct, 0) << "failed to update metadata sync status: " << cpp_strerror(-r) << dendl; @@ -2356,9 +2385,9 @@ void RGWObjVersionTracker::generate_new_write_ver(CephContext *cct) int RGWPutObjProcessor::complete(size_t accounted_size, const string& etag, real_time *mtime, real_time set_mtime, map<string, bufferlist>& attrs, real_time delete_at, - const char *if_match, const char *if_nomatch) + const char *if_match, const char *if_nomatch, const string *user_data) { - int r = do_complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, if_match, if_nomatch); + int r = do_complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, if_match, if_nomatch, user_data); if (r < 0) return r; @@ -2531,6 +2560,9 @@ int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phan *pobj = cur_obj; + if (!bl.length()) + return 0; + return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle, exclusive); } @@ -2700,7 +2732,7 @@ int RGWPutObjProcessor_Atomic::do_complete(size_t accounted_size, const string& map<string, bufferlist>& attrs, real_time delete_at, const char *if_match, - const char *if_nomatch) { + const char *if_nomatch, const string *user_data) { int r = complete_writing_data(); if (r < 0) return r; @@ -2725,6 +2757,8 @@ int RGWPutObjProcessor_Atomic::do_complete(size_t accounted_size, const string& obj_op.meta.flags = PUT_OBJ_CREATE; obj_op.meta.olh_epoch = olh_epoch; obj_op.meta.delete_at = delete_at; + obj_op.meta.user_data = user_data; + r = obj_op.write_meta(obj_len, accounted_size, attrs); if (r < 0) { return r; @@ -2927,7 +2961,7 @@ protected: CephContext *cct; RGWRados *store; - atomic_t down_flag; + std::atomic<bool> down_flag = { false }; string thread_name; @@ -2943,7 +2977,8 @@ public: virtual int init() { return 0; } virtual int process() = 0; - bool going_down() { return down_flag.read() != 0; } + bool going_down() { return down_flag; } + void start(); void stop(); }; @@ -2956,7 +2991,7 @@ void RGWRadosThread::start() void RGWRadosThread::stop() { - down_flag.set(1); + down_flag = true; stop_process(); if (worker) { worker->stop(); @@ -3194,9 +3229,20 @@ public: return http.set_threaded(); } int process() override { - crs.run(create_data_log_trim_cr(store, &http, - cct->_conf->rgw_data_log_num_shards, - trim_interval)); + list<RGWCoroutinesStack*> stacks; + auto meta = new RGWCoroutinesStack(store->ctx(), &crs); + meta->call(create_meta_log_trim_cr(store, &http, + cct->_conf->rgw_md_log_max_shards, + trim_interval)); + stacks.push_back(meta); + + auto data = new RGWCoroutinesStack(store->ctx(), &crs); + data->call(create_data_log_trim_cr(store, &http, + cct->_conf->rgw_data_log_num_shards, + trim_interval)); + stacks.push_back(data); + + crs.run(stacks); return 0; } }; @@ -4129,7 +4175,8 @@ int RGWRados::init_complete() /* no point of running sync thread if we don't have a master zone configured or there is no rest_master_conn */ - if (get_zonegroup().master_zone.empty() || !rest_master_conn) { + if (get_zonegroup().master_zone.empty() || !rest_master_conn + || current_period.get_id().empty()) { run_sync_thread = false; } @@ -5182,14 +5229,14 @@ int RGWRados::Bucket::List::list_objects(int max, vector<rgw_bucket_dir_entry> * if (delim_pos >= 0) { string s = cur_marker.name.substr(0, delim_pos); s.append(bigger_than_delim); - cur_marker.set(s); + cur_marker = s; } } string skip_after_delim; while (truncated && count <= max) { if (skip_after_delim > cur_marker.name) { - cur_marker.set(skip_after_delim); + cur_marker = skip_after_delim; ldout(cct, 20) << "setting cur_marker=" << cur_marker.name << "[" << cur_marker.instance << "]" << dendl; } std::map<string, rgw_bucket_dir_entry> ent_map; @@ -5370,7 +5417,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, for (int i = 0; i < MAX_CREATE_RETRIES; i++) { int ret = 0; - ret = select_bucket_placement(owner, zonegroup_id, placement_rule, bucket, + ret = select_bucket_placement(owner, zonegroup_id, placement_rule, &selected_placement_rule_name, &rule_info); if (ret < 0) return ret; @@ -5414,7 +5461,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, info.quota = *pquota_info; } - int r = init_bucket_index(info, bucket_index_max_shards); + int r = init_bucket_index(info, info.num_shards); if (r < 0) { return r; } @@ -5465,7 +5512,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, } int RGWRados::select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& request_rule, - rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info) + string *pselected_rule_name, RGWZonePlacementInfo *rule_info) { /* first check that rule exists within the specific zonegroup */ @@ -5506,16 +5553,16 @@ int RGWRados::select_new_bucket_location(RGWUserInfo& user_info, const string& z if (pselected_rule_name) *pselected_rule_name = rule; - return select_bucket_location_by_rule(rule, bucket, rule_info); + return select_bucket_location_by_rule(rule, rule_info); } -int RGWRados::select_bucket_location_by_rule(const string& location_rule, rgw_bucket& bucket, RGWZonePlacementInfo *rule_info) +int RGWRados::select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info) { if (location_rule.empty()) { /* we can only reach here if we're trying to set a bucket location from a bucket * created on a different zone, using a legacy / default pool configuration */ - return select_legacy_bucket_placement(bucket, rule_info); + return select_legacy_bucket_placement(rule_info); } /* @@ -5546,21 +5593,21 @@ int RGWRados::select_bucket_location_by_rule(const string& location_rule, rgw_bu } int RGWRados::select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& placement_rule, - rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info) + string *pselected_rule_name, RGWZonePlacementInfo *rule_info) { if (!get_zone_params().placement_pools.empty()) { return select_new_bucket_location(user_info, zonegroup_id, placement_rule, - bucket, pselected_rule_name, rule_info); + pselected_rule_name, rule_info); } if (pselected_rule_name) { pselected_rule_name->clear(); } - return select_legacy_bucket_placement(bucket, rule_info); + return select_legacy_bucket_placement(rule_info); } -int RGWRados::select_legacy_bucket_placement(rgw_bucket& bucket, RGWZonePlacementInfo *rule_info) +int RGWRados::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info) { bufferlist map_bl; map<string, bufferlist> m; @@ -6224,7 +6271,6 @@ int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx, NULL, /* string *version_id */ NULL, /* string *ptag */ NULL, /* string *petag */ - NULL, /* struct rgw_err *err */ NULL, /* void (*progress_cb)(off_t, void *) */ NULL); /* void *progress_data */ if (r == -ECANCELED || r == -ENOENT) { @@ -6314,7 +6360,6 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, nullptr, /* string *version_id */ nullptr, /* string *ptag */ nullptr, /* string *petag */ - nullptr, /* struct rgw_err *err */ nullptr, /* void (*progress_cb)(off_t, void *) */ nullptr); /* void *progress_data */ if (ret == -ECANCELED || ret == -ENOENT) { @@ -6504,7 +6549,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si r = index_op->complete(poolid, epoch, size, accounted_size, meta.set_mtime, etag, content_type, &acl_bl, - meta.category, meta.remove_objs); + meta.category, meta.remove_objs, meta.user_data); if (r < 0) goto done_cancel; @@ -6674,7 +6719,8 @@ int RGWRados::put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, real_time *mt } int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, - off_t ofs, bool exclusive) + off_t ofs, bool exclusive, + RGWObjVersionTracker *objv_tracker) { rgw_rados_ref ref; rgw_pool pool; @@ -6688,6 +6734,9 @@ int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, if (exclusive) op.create(true); + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } if (ofs == -1) { op.write_full(bl); } else { @@ -6697,6 +6746,9 @@ int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, if (r < 0) return r; + if (objv_tracker) { + objv_tracker->apply_write(); + } return 0; } @@ -6952,7 +7004,7 @@ int RGWRados::rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj) } return copy_obj_data(rctx, dest_bucket_info, read_op, obj_size - 1, obj, obj, max_chunk_size, NULL, mtime, attrset, - RGW_OBJ_CATEGORY_MAIN, 0, real_time(), NULL, NULL, NULL, NULL); + RGW_OBJ_CATEGORY_MAIN, 0, real_time(), NULL, NULL, NULL); } struct obj_time_weight { @@ -7171,7 +7223,6 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, string *version_id, string *ptag, ceph::buffer::list *petag, - struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data) { @@ -7469,7 +7520,6 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, string *version_id, string *ptag, ceph::buffer::list *petag, - struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data) { @@ -7499,7 +7549,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, dest_obj, src_obj, dest_bucket_info, src_bucket_info, src_mtime, mtime, mod_ptr, unmod_ptr, high_precision_time, if_match, if_nomatch, attrs_mod, copy_if_newer, attrs, category, - olh_epoch, delete_at, version_id, ptag, petag, err, progress_cb, progress_data); + olh_epoch, delete_at, version_id, ptag, petag, progress_cb, progress_data); } map<string, bufferlist> src_attrs; @@ -7514,7 +7564,6 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, read_op.params.attrs = &src_attrs; read_op.params.lastmod = src_mtime; read_op.params.obj_size = &obj_size; - read_op.params.perr = err; ret = read_op.prepare(); if (ret < 0) { @@ -7594,7 +7643,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */ return copy_obj_data(obj_ctx, dest_bucket_info, read_op, obj_size - 1, dest_obj, src_obj, max_chunk_size, mtime, real_time(), attrs, category, olh_epoch, delete_at, - version_id, ptag, petag, err); + version_id, ptag, petag); } RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin(); @@ -7730,8 +7779,7 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, real_time delete_at, string *version_id, string *ptag, - ceph::buffer::list *petag, - struct rgw_err *err) + ceph::buffer::list *petag) { bufferlist first_chunk; RGWObjManifest manifest; @@ -9460,7 +9508,7 @@ int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch, const string& content_type, bufferlist *acl_bl, RGWObjCategory category, - list<rgw_obj_index_key> *remove_objs) + list<rgw_obj_index_key> *remove_objs, const string *user_data) { if (blind) { return 0; @@ -9479,6 +9527,9 @@ int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch, ent.meta.accounted_size = accounted_size; ent.meta.mtime = ut; ent.meta.etag = etag; + if (user_data) + ent.meta.user_data = *user_data; + ACLOwner owner; if (acl_bl && acl_bl->length()) { int ret = store->decode_policy(*acl_bl, &owner); @@ -9760,8 +9811,8 @@ struct get_obj_data : public RefCountedObject { Mutex data_lock; list<get_obj_aio_data> aio_data; RGWGetDataCB *client_cb; - atomic_t cancelled; - atomic_t err_code; + std::atomic<bool> cancelled = { false }; + std::atomic<int64_t> err_code = { 0 }; Throttle throttle; list<bufferlist> read_list; @@ -9773,16 +9824,16 @@ struct get_obj_data : public RefCountedObject { throttle(cct, "get_obj_data", cct->_conf->rgw_get_obj_window_size, false) {} ~get_obj_data() override { } void set_cancelled(int r) { - cancelled.set(1); - err_code.set(r); + cancelled = true; + err_code = r; } bool is_cancelled() { - return cancelled.read() == 1; + return cancelled; } int get_err_code() { - return err_code.read(); + return err_code; } int wait_next_io(bool *done) { @@ -11626,7 +11677,7 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_ if (filter && !filter->filter(oid, oid)) continue; - e.key.set(oid); + e.key = oid; objs.push_back(e); } diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index e4947298e62..e60ccbdf0be 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -1787,7 +1787,8 @@ class RGWPeriod const string get_period_oid_prefix(); // gather the metadata sync status for each shard; only for use on master zone - int update_sync_status(); + int update_sync_status(const RGWPeriod ¤t_period, + std::ostream& error_stream, bool force_if_stale); public: RGWPeriod() : epoch(0), cct(NULL), store(NULL) {} @@ -1860,7 +1861,7 @@ public: // commit a staging period; only for use on master zone int commit(RGWRealm& realm, const RGWPeriod ¤t_period, - std::ostream& error_stream); + std::ostream& error_stream, bool force_if_stale = false); void encode(bufferlist& bl) const { ENCODE_START(1, 1, bl); @@ -2198,7 +2199,7 @@ class RGWRados void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result); - atomic64_t max_req_id; + std::atomic<int64_t> max_req_id = { 0 }; Mutex lock; Mutex watchers_lock; SafeTimer *timer; @@ -2297,7 +2298,7 @@ protected: RGWPeriod current_period; public: - RGWRados() : max_req_id(0), lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL), + RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL), gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false), run_sync_thread(false), async_rados(nullptr), meta_notifier(NULL), data_notifier(NULL), meta_sync_processor_thread(NULL), @@ -2319,7 +2320,7 @@ public: meta_mgr(NULL), data_log(NULL) {} uint64_t get_new_req_id() { - return max_req_id.inc(); + return ++max_req_id; } librados::IoCtx* get_lc_pool_ctx() { @@ -2528,11 +2529,11 @@ public: */ int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards); int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule, - rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info); - int select_legacy_bucket_placement(rgw_bucket& bucket, RGWZonePlacementInfo *rule_info); + string *pselected_rule_name, RGWZonePlacementInfo *rule_info); + int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info); int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule, - rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info); - int select_bucket_location_by_rule(const string& location_rule, rgw_bucket& bucket, RGWZonePlacementInfo *rule_info); + string *pselected_rule_name, RGWZonePlacementInfo *rule_info); + int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info); void create_bucket_id(string *bucket_id); bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool); @@ -2594,9 +2595,8 @@ public: ceph::real_time *lastmod; uint64_t *obj_size; map<string, bufferlist> *attrs; - struct rgw_err *perr; - StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {} + StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {} } stat_params; struct ReadParams { @@ -2707,9 +2707,8 @@ public: ceph::real_time *lastmod; uint64_t *obj_size; map<string, bufferlist> *attrs; - struct rgw_err *perr; - Params() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {} + Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {} } params; explicit Read(RGWRados::Object *_source) : source(_source) {} @@ -2740,10 +2739,11 @@ public: uint64_t olh_epoch; ceph::real_time delete_at; bool canceled; + const string *user_data; MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL), remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0), - if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false) {} + if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr) {} } meta; explicit Write(RGWRados::Object *_target) : target(_target) {} @@ -2876,7 +2876,7 @@ public: uint64_t accounted_size, ceph::real_time& ut, const string& etag, const string& content_type, bufferlist *acl_bl, RGWObjCategory category, - list<rgw_obj_index_key> *remove_objs); + list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr); int complete_del(int64_t poolid, uint64_t epoch, ceph::real_time& removed_mtime, /* mtime of removed object */ list<rgw_obj_index_key> *remove_objs); @@ -2922,7 +2922,8 @@ public: ceph::real_time set_mtime /* 0 for don't set */); virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, - off_t ofs, bool exclusive); + off_t ofs, bool exclusive, + RGWObjVersionTracker *objv_tracker = nullptr); int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive, void **handle); @@ -3020,7 +3021,6 @@ public: string *version_id, string *ptag, ceph::buffer::list *petag, - struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data); /** @@ -3035,7 +3035,6 @@ public: * parameter, source object attributes are not copied; * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes * are overwritten by values contained in attrs parameter. - * err: stores any errors resulting from the get of the original object * Returns: 0 on success, -ERR# otherwise. */ int copy_obj(RGWObjectCtx& obj_ctx, @@ -3064,7 +3063,6 @@ public: string *version_id, string *ptag, ceph::buffer::list *petag, - struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data); @@ -3082,8 +3080,7 @@ public: ceph::real_time delete_at, string *version_id, string *ptag, - ceph::buffer::list *petag, - struct rgw_err *err); + ceph::buffer::list *petag); int check_bucket_empty(RGWBucketInfo& bucket_info); @@ -3655,7 +3652,7 @@ protected: virtual int do_complete(size_t accounted_size, const string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, map<string, bufferlist>& attrs, ceph::real_time delete_at, - const char *if_match, const char *if_nomatch) = 0; + const char *if_match, const char *if_nomatch, const string *user_data) = 0; public: RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL), @@ -3672,7 +3669,7 @@ public: int complete(size_t accounted_size, const string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, map<string, bufferlist>& attrs, ceph::real_time delete_at, - const char *if_match = NULL, const char *if_nomatch = NULL); + const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr); CephContext *ctx(); @@ -3750,7 +3747,7 @@ protected: int do_complete(size_t accounted_size, const string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, map<string, bufferlist>& attrs, ceph::real_time delete_at, - const char *if_match, const char *if_nomatch) override; + const char *if_match, const char *if_nomatch, const string *user_data) override; int prepare_next_part(off_t ofs); int complete_parts(); @@ -3865,7 +3862,7 @@ protected: int do_complete(size_t accounted_size, const string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, map<string, bufferlist>& attrs, ceph::real_time delete_at, - const char *if_match, const char *if_nomatch) override; + const char *if_match, const char *if_nomatch, const string *user_data) override; public: bool immutable_head() { return true; } RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) : diff --git a/src/rgw/rgw_realm_reloader.cc b/src/rgw/rgw_realm_reloader.cc index a1d17831798..8bd65b45d9f 100644 --- a/src/rgw/rgw_realm_reloader.cc +++ b/src/rgw/rgw_realm_reloader.cc @@ -64,12 +64,10 @@ void RGWRealmReloader::handle_notify(RGWRealmNotify type, reload_scheduled = new C_Reload(this); cond.SignalOne(); // wake reload() if it blocked on a bad configuration - // schedule reload() with a delay so we can batch up changes - auto delay = cct->_conf->rgw_realm_reconfigure_delay; - timer.add_event_after(delay, reload_scheduled); + // schedule reload() without delay + timer.add_event_after(0, reload_scheduled); - ldout(cct, 4) << "Notification on realm, reconfiguration scheduled in " - << delay << 's' << dendl; + ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl; } void RGWRealmReloader::reload() diff --git a/src/rgw/rgw_realm_reloader.h b/src/rgw/rgw_realm_reloader.h index 3de54b1aabc..e4e3a436342 100644 --- a/src/rgw/rgw_realm_reloader.h +++ b/src/rgw/rgw_realm_reloader.h @@ -10,8 +10,8 @@ class RGWRados; /** - * RGWRealmReloader responds to notifications by recreating RGWRados with the - * updated realm configuration. + * RGWRealmReloader responds to new period notifications by recreating RGWRados + * with the updated realm configuration. */ class RGWRealmReloader : public RGWRealmWatcher::Watcher { public: @@ -20,8 +20,7 @@ class RGWRealmReloader : public RGWRealmWatcher::Watcher { * is required to ensure that they stop issuing requests on the old * RGWRados instance, and restart with the updated configuration. * - * This abstraction avoids a depency on class RGWFrontend, which is only - * defined in rgw_main.cc + * This abstraction avoids a depency on class RGWFrontend. */ class Pauser { public: @@ -50,9 +49,9 @@ class RGWRealmReloader : public RGWRealmWatcher::Watcher { Pauser *const frontends; /// reload() takes a significant amount of time, so we don't want to run - /// it in the handle_notify() thread. we choose a timer thread because we - /// also want to add a delay (see rgw_realm_reconfigure_delay) so that we - /// can batch up notifications within that window + /// it in the handle_notify() thread. we choose a timer thread instead of a + /// Finisher because it allows us to cancel events that were scheduled while + /// reload() is still running SafeTimer timer; Mutex mutex; //< protects access to timer and reload_scheduled Cond cond; //< to signal reload() after an invalid realm config diff --git a/src/rgw/rgw_replica_log.h b/src/rgw/rgw_replica_log.h index 45cac52d68c..699600c0fe4 100644 --- a/src/rgw/rgw_replica_log.h +++ b/src/rgw/rgw_replica_log.h @@ -22,8 +22,6 @@ class RGWRados; class CephContext; -using namespace std; - #define META_REPLICA_LOG_OBJ_PREFIX "meta.replicalog." #define DATA_REPLICA_LOG_OBJ_PREFIX "data.replicalog." diff --git a/src/rgw/rgw_request.h b/src/rgw/rgw_request.h index d9fc69bee1c..3c835f7b1c9 100644 --- a/src/rgw/rgw_request.h +++ b/src/rgw/rgw_request.h @@ -12,8 +12,11 @@ #if defined(WITH_RADOSGW_FCGI_FRONTEND) #include "rgw_fcgi.h" #endif + #include "common/QueueRing.h" +#include <atomic> + struct RGWRequest { uint64_t id; @@ -56,10 +59,10 @@ struct RGWLoadGenRequest : public RGWRequest { string method; string resource; int content_length; - atomic_t* fail_flag; + std::atomic<int64_t>* fail_flag = nullptr; RGWLoadGenRequest(uint64_t req_id, const string& _m, const string& _r, int _cl, - atomic_t *ff) + std::atomic<int64_t> *ff) : RGWRequest(req_id), method(_m), resource(_r), content_length(_cl), fail_flag(ff) {} }; diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc index 175ceb9606b..140d434e737 100644 --- a/src/rgw/rgw_rest.cc +++ b/src/rgw/rgw_rest.cc @@ -1,6 +1,7 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab + #include <errno.h> #include <limits.h> @@ -18,8 +19,6 @@ #include "rgw_rest_s3.h" #include "rgw_swift_auth.h" #include "rgw_cors_s3.h" -#include "rgw_http_errors.h" -#include "rgw_lib.h" #include "rgw_client_io.h" #include "rgw_resolve.h" @@ -28,6 +27,53 @@ #define dout_subsys ceph_subsys_rgw +struct rgw_http_status_code { + int code; + const char *name; +}; + +const static struct rgw_http_status_code http_codes[] = { + { 100, "Continue" }, + { 200, "OK" }, + { 201, "Created" }, + { 202, "Accepted" }, + { 204, "No Content" }, + { 205, "Reset Content" }, + { 206, "Partial Content" }, + { 207, "Multi Status" }, + { 208, "Already Reported" }, + { 300, "Multiple Choices" }, + { 301, "Moved Permanently" }, + { 302, "Found" }, + { 303, "See Other" }, + { 304, "Not Modified" }, + { 305, "User Proxy" }, + { 306, "Switch Proxy" }, + { 307, "Temporary Redirect" }, + { 308, "Permanent Redirect" }, + { 400, "Bad Request" }, + { 401, "Unauthorized" }, + { 402, "Payment Required" }, + { 403, "Forbidden" }, + { 404, "Not Found" }, + { 405, "Method Not Allowed" }, + { 406, "Not Acceptable" }, + { 407, "Proxy Authentication Required" }, + { 408, "Request Timeout" }, + { 409, "Conflict" }, + { 410, "Gone" }, + { 411, "Length Required" }, + { 412, "Precondition Failed" }, + { 413, "Request Entity Too Large" }, + { 414, "Request-URI Too Long" }, + { 415, "Unsupported Media Type" }, + { 416, "Requested Range Not Satisfiable" }, + { 417, "Expectation Failed" }, + { 422, "Unprocessable Entity" }, + { 500, "Internal Server Error" }, + { 501, "Not Implemented" }, + { 0, NULL }, +}; struct rgw_http_attr { const char *rgw_attr; @@ -309,45 +355,6 @@ void rgw_flush_formatter(struct req_state *s, Formatter *formatter) } } -void set_req_state_err(struct rgw_err& err, /* out */ - int err_no, /* in */ - const int prot_flags) /* in */ -{ - const struct rgw_http_errors *r; - - if (err_no < 0) - err_no = -err_no; - err.ret = -err_no; - if (prot_flags & RGW_REST_SWIFT) { - r = search_err(err_no, RGW_HTTP_SWIFT_ERRORS, - ARRAY_LEN(RGW_HTTP_SWIFT_ERRORS)); - if (r) { - err.http_ret = r->http_ret; - err.s3_code = r->s3_code; - return; - } - } - - r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS)); - if (r) { - err.http_ret = r->http_ret; - err.s3_code = r->s3_code; - return; - } - dout(0) << "WARNING: set_req_state_err err_no=" << err_no - << " resorting to 500" << dendl; - - err.http_ret = 500; - err.s3_code = "UnknownError"; -} - -void set_req_state_err(struct req_state * const s, const int err_no) -{ - if (s) { - set_req_state_err(s->err, err_no, s->prot_flags); - } -} - void dump_errno(int http_ret, string& out) { stringstream ss; @@ -658,7 +665,7 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type, dump_trans_id(s); - if ((!s->err.is_err()) && + if ((!s->is_err()) && (s->bucket_info.owner != s->user->user_id) && (s->bucket_info.requester_pays)) { dump_header(s, "x-amz-request-charged", "requester"); @@ -675,7 +682,7 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type, /* do not send content type if content length is zero and the content type was not set by the user */ if (force_content_type || - (!content_type && s->formatter->get_len() != 0) || s->err.is_err()){ + (!content_type && s->formatter->get_len() != 0) || s->is_err()){ switch (s->format) { case RGW_FORMAT_XML: ctype = "application/xml"; @@ -694,24 +701,9 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type, ctype.append("; charset=utf-8"); content_type = ctype.c_str(); } - if (!force_no_error && s->err.is_err()) { + if (!force_no_error && s->is_err()) { dump_start(s); - if (s->format != RGW_FORMAT_HTML) { - s->formatter->open_object_section("Error"); - } - if (!s->err.s3_code.empty()) - s->formatter->dump_string("Code", s->err.s3_code); - if (!s->err.message.empty()) - s->formatter->dump_string("Message", s->err.message); - if (!s->bucket_name.empty()) // TODO: connect to expose_bucket - s->formatter->dump_string("BucketName", s->bucket_name); - if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle - s->formatter->dump_string("RequestId", s->trans_id); - s->formatter->dump_string("HostId", s->host_id); - if (s->format != RGW_FORMAT_HTML) { - s->formatter->close_section(); - } - s->formatter->output_footer(); + dump(s); dump_content_length(s, s->formatter->get_len()); } else { if (proposed_content_length == CHUNKED_TRANSFER_ENCODING) { @@ -736,8 +728,8 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type, rgw_flush_formatter_and_reset(s, s->formatter); } -void abort_early(struct req_state *s, RGWOp *op, int err_no, - RGWHandler* handler) +void abort_early(struct req_state *s, RGWOp* op, int err_no, + RGWHandler* handler) { string error_content(""); if (!s->formatter) { @@ -764,12 +756,10 @@ void abort_early(struct req_state *s, RGWOp *op, int err_no, // returned 0. If non-zero, we need to continue here. if (err_no) { // Watch out, we might have a custom error state already set! - if (s->err.http_ret && s->err.http_ret != 200) { - dump_errno(s); - } else { + if (!s->err.http_ret || s->err.http_ret == 200) { set_req_state_err(s, err_no); - dump_errno(s); } + dump_errno(s); dump_bucket_from_state(s); if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) { string dest_uri; @@ -1236,6 +1226,308 @@ int RGWPutObj_ObjStore::get_data(bufferlist& bl) return len; } + +/* + * parses params in the format: 'first; param1=foo; param2=bar' + */ +void RGWPostObj_ObjStore::parse_boundary_params(const std::string& params_str, + std::string& first, + std::map<std::string, + std::string>& params) +{ + size_t pos = params_str.find(';'); + if (std::string::npos == pos) { + first = rgw_trim_whitespace(params_str); + return; + } + + first = rgw_trim_whitespace(params_str.substr(0, pos)); + pos++; + + while (pos < params_str.size()) { + size_t end = params_str.find(';', pos); + if (std::string::npos == end) { + end = params_str.size(); + } + + std::string param = params_str.substr(pos, end - pos); + size_t eqpos = param.find('='); + + if (std::string::npos != eqpos) { + std::string param_name = rgw_trim_whitespace(param.substr(0, eqpos)); + std::string val = rgw_trim_quotes(param.substr(eqpos + 1)); + params[std::move(param_name)] = std::move(val); + } else { + params[rgw_trim_whitespace(param)] = ""; + } + + pos = end + 1; + } +} + +int RGWPostObj_ObjStore::parse_part_field(const std::string& line, + std::string& field_name, /* out */ + post_part_field& field) /* out */ +{ + size_t pos = line.find(':'); + if (pos == string::npos) + return -EINVAL; + + field_name = line.substr(0, pos); + if (pos >= line.size() - 1) + return 0; + + parse_boundary_params(line.substr(pos + 1), field.val, field.params); + + return 0; +} + +static bool is_crlf(const char *s) +{ + return (*s == '\r' && *(s + 1) == '\n'); +} + +/* + * find the index of the boundary, if exists, or optionally the next end of line + * also returns how many bytes to skip + */ +static int index_of(ceph::bufferlist& bl, + uint64_t max_len, + const std::string& str, + const bool check_crlf, + bool& reached_boundary, + int& skip) +{ + reached_boundary = false; + skip = 0; + + if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks) + return -EINVAL; + + if (bl.length() < str.size()) + return -1; + + const char *buf = bl.c_str(); + const char *s = str.c_str(); + + if (max_len > bl.length()) + max_len = bl.length(); + + for (uint64_t i = 0; i < max_len; i++, buf++) { + if (check_crlf && + i >= 1 && + is_crlf(buf - 1)) { + return i + 1; // skip the crlf + } + if ((i < max_len - str.size() + 1) && + (buf[0] == s[0] && buf[1] == s[1]) && + (strncmp(buf, s, str.size()) == 0)) { + reached_boundary = true; + skip = str.size(); + + /* oh, great, now we need to swallow the preceding crlf + * if exists + */ + if ((i >= 2) && + is_crlf(buf - 2)) { + i -= 2; + skip += 2; + } + return i; + } + } + + return -1; +} + +int RGWPostObj_ObjStore::read_with_boundary(ceph::bufferlist& bl, + uint64_t max, + const bool check_crlf, + bool& reached_boundary, + bool& done) +{ + uint64_t cl = max + 2 + boundary.size(); + + if (max > in_data.length()) { + uint64_t need_to_read = cl - in_data.length(); + + bufferptr bp(need_to_read); + + const auto read_len = recv_body(s, bp.c_str(), need_to_read); + if (read_len < 0) { + return read_len; + } + in_data.append(bp, 0, read_len); + } + + done = false; + int skip; + const int index = index_of(in_data, cl, boundary, check_crlf, + reached_boundary, skip); + if (index >= 0) { + max = index; + } + + if (max > in_data.length()) { + max = in_data.length(); + } + + bl.substr_of(in_data, 0, max); + + ceph::bufferlist new_read_data; + + /* + * now we need to skip boundary for next time, also skip any crlf, or + * check to see if it's the last final boundary (marked with "--" at the end + */ + if (reached_boundary) { + int left = in_data.length() - max; + if (left < skip + 2) { + int need = skip + 2 - left; + bufferptr boundary_bp(need); + const int r = recv_body(s, boundary_bp.c_str(), need); + if (r < 0) { + return r; + } + in_data.append(boundary_bp); + } + max += skip; // skip boundary for next time + if (in_data.length() >= max + 2) { + const char *data = in_data.c_str(); + if (is_crlf(data + max)) { + max += 2; + } else { + if (*(data + max) == '-' && + *(data + max + 1) == '-') { + done = true; + max += 2; + } + } + } + } + + new_read_data.substr_of(in_data, max, in_data.length() - max); + in_data = new_read_data; + + return 0; +} + +int RGWPostObj_ObjStore::read_line(ceph::bufferlist& bl, + const uint64_t max, + bool& reached_boundary, + bool& done) +{ + return read_with_boundary(bl, max, true, reached_boundary, done); +} + +int RGWPostObj_ObjStore::read_data(ceph::bufferlist& bl, + const uint64_t max, + bool& reached_boundary, + bool& done) +{ + return read_with_boundary(bl, max, false, reached_boundary, done); +} + + +int RGWPostObj_ObjStore::read_form_part_header(struct post_form_part* const part, + bool& done) +{ + bufferlist bl; + bool reached_boundary; + uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; + int r = read_line(bl, chunk_size, reached_boundary, done); + if (r < 0) { + return r; + } + + if (done) { + return 0; + } + + if (reached_boundary) { // skip the first boundary + r = read_line(bl, chunk_size, reached_boundary, done); + if (r < 0) { + return r; + } else if (done) { + return 0; + } + } + + while (true) { + /* + * iterate through fields + */ + std::string line = rgw_trim_whitespace(string(bl.c_str(), bl.length())); + + if (line.empty()) { + break; + } + + struct post_part_field field; + + string field_name; + r = parse_part_field(line, field_name, field); + if (r < 0) { + return r; + } + + part->fields[field_name] = field; + + if (stringcasecmp(field_name, "Content-Disposition") == 0) { + part->name = field.params["name"]; + } + + if (reached_boundary) { + break; + } + + r = read_line(bl, chunk_size, reached_boundary, done); + } + + return 0; +} + +bool RGWPostObj_ObjStore::part_str(parts_collection_t& parts, + const std::string& name, + std::string* val) +{ + const auto iter = parts.find(name); + if (std::end(parts) == iter) { + return false; + } + + ceph::bufferlist& data = iter->second.data; + std::string str = string(data.c_str(), data.length()); + *val = rgw_trim_whitespace(str); + return true; +} + +std::string RGWPostObj_ObjStore::get_part_str(parts_collection_t& parts, + const std::string& name, + const std::string& def_val) +{ + std::string val; + + if (part_str(parts, name, &val)) { + return val; + } else { + return rgw_trim_whitespace(def_val); + } +} + +bool RGWPostObj_ObjStore::part_bl(parts_collection_t& parts, + const std::string& name, + ceph::bufferlist* pbl) +{ + const auto iter = parts.find(name); + if (std::end(parts) == iter) { + return false; + } + + *pbl = iter->second.data; + return true; +} + int RGWPostObj_ObjStore::verify_params() { /* check that we have enough memory to store the object @@ -1252,6 +1544,51 @@ int RGWPostObj_ObjStore::verify_params() return 0; } +int RGWPostObj_ObjStore::get_params() +{ + if (s->expect_cont) { + /* OK, here it really gets ugly. With POST, the params are embedded in the + * request body, so we need to continue before being able to actually look + * at them. This diverts from the usual request flow. */ + dump_continue(s); + s->expect_cont = false; + } + + std::string req_content_type_str = s->info.env->get("CONTENT_TYPE", ""); + std::string req_content_type; + std::map<std::string, std::string> params; + parse_boundary_params(req_content_type_str, req_content_type, params); + + if (req_content_type.compare("multipart/form-data") != 0) { + err_msg = "Request Content-Type is not multipart/form-data"; + return -EINVAL; + } + + if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { + ldout(s->cct, 20) << "request content_type_str=" + << req_content_type_str << dendl; + ldout(s->cct, 20) << "request content_type params:" << dendl; + + for (const auto& pair : params) { + ldout(s->cct, 20) << " " << pair.first << " -> " << pair.second + << dendl; + } + } + + const auto iter = params.find("boundary"); + if (std::end(params) == iter) { + err_msg = "Missing multipart boundary specification"; + return -EINVAL; + } + + /* Create the boundary. */ + boundary = "--"; + boundary.append(iter->second); + + return 0; +} + + int RGWPutACLs_ObjStore::get_params() { const auto max_size = s->cct->_conf->rgw_max_put_param_size; diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h index c618f532f3c..41597a2d476 100644 --- a/src/rgw/rgw_rest.h +++ b/src/rgw/rgw_rest.h @@ -218,6 +218,67 @@ public: class RGWPostObj_ObjStore : public RGWPostObj { + std::string boundary; + +public: + struct post_part_field { + std::string val; + std::map<std::string, std::string> params; + }; + + struct post_form_part { + std::string name; + std::map<std::string, post_part_field, ltstr_nocase> fields; + ceph::bufferlist data; + }; + +protected: + using parts_collection_t = \ + std::map<std::string, post_form_part, const ltstr_nocase>; + + std::string err_msg; + ceph::bufferlist in_data; + + int read_with_boundary(ceph::bufferlist& bl, + uint64_t max, + bool check_eol, + bool& reached_boundary, + bool& done); + + int read_line(ceph::bufferlist& bl, + uint64_t max, + bool& reached_boundary, + bool& done); + + int read_data(ceph::bufferlist& bl, + uint64_t max, + bool& reached_boundary, + bool& done); + + int read_form_part_header(struct post_form_part *part, bool& done); + + int get_params() override; + + static int parse_part_field(const std::string& line, + std::string& field_name, /* out */ + post_part_field& field); /* out */ + + static void parse_boundary_params(const std::string& params_str, + std::string& first, + std::map<std::string, std::string>& params); + + static bool part_str(parts_collection_t& parts, + const std::string& name, + std::string *val); + + static std::string get_part_str(parts_collection_t& parts, + const std::string& name, + const std::string& def_val = std::string()); + + static bool part_bl(parts_collection_t& parts, + const std::string& name, + ceph::bufferlist *pbl); + public: RGWPostObj_ObjStore() {} ~RGWPostObj_ObjStore() override {} @@ -225,6 +286,7 @@ public: int verify_params() override; }; + class RGWPutMetadataAccount_ObjStore : public RGWPutMetadataAccount { public: @@ -564,8 +626,6 @@ public: static constexpr int64_t NO_CONTENT_LENGTH = -1; static constexpr int64_t CHUNKED_TRANSFER_ENCODING = -2; -extern void set_req_state_err(struct rgw_err &err, int err_no, int prot_flags); -extern void set_req_state_err(struct req_state *s, int err_no); extern void dump_errno(int http_ret, string& out); extern void dump_errno(const struct rgw_err &err, string& out); extern void dump_errno(struct req_state *s); diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc index 8699624002b..71a11554d2c 100644 --- a/src/rgw/rgw_rest_conn.cc +++ b/src/rgw/rgw_rest_conn.cc @@ -19,6 +19,27 @@ RGWRESTConn::RGWRESTConn(CephContext *_cct, RGWRados *store, } } +RGWRESTConn::RGWRESTConn(RGWRESTConn&& other) + : cct(other.cct), + endpoints(std::move(other.endpoints)), + key(std::move(other.key)), + self_zone_group(std::move(other.self_zone_group)), + remote_id(std::move(other.remote_id)), + counter(other.counter.load()) +{ +} + +RGWRESTConn& RGWRESTConn::operator=(RGWRESTConn&& other) +{ + cct = other.cct; + endpoints = std::move(other.endpoints); + key = std::move(other.key); + self_zone_group = std::move(other.self_zone_group); + remote_id = std::move(other.remote_id); + counter = other.counter.load(); + return *this; +} + int RGWRESTConn::get_url(string& endpoint) { if (endpoints.empty()) { @@ -26,7 +47,7 @@ int RGWRESTConn::get_url(string& endpoint) return -EIO; } - int i = counter.inc(); + int i = ++counter; endpoint = endpoints[i % endpoints.size()]; return 0; @@ -40,7 +61,7 @@ string RGWRESTConn::get_url() return endpoint; } - int i = counter.inc(); + int i = ++counter; endpoint = endpoints[i % endpoints.size()]; return endpoint; @@ -182,7 +203,13 @@ int RGWRESTConn::get_obj(const rgw_user& uid, req_info *info /* optional */, rgw set_header(mod_pg_ver, extra_headers, "HTTP_DEST_PG_VER"); } - return (*req)->get_obj(key, extra_headers, obj); + int r = (*req)->get_obj(key, extra_headers, obj); + if (r < 0) { + delete *req; + *req = nullptr; + } + + return r; } int RGWRESTConn::complete_request(RGWRESTStreamRWRequest *req, string& etag, real_time *mtime, diff --git a/src/rgw/rgw_rest_conn.h b/src/rgw/rgw_rest_conn.h index 674387ffa05..bec829d6939 100644 --- a/src/rgw/rgw_rest_conn.h +++ b/src/rgw/rgw_rest_conn.h @@ -9,6 +9,7 @@ #include "common/ceph_json.h" #include "common/RefCountedObj.h" +#include <atomic> class CephContext; class RGWRados; @@ -55,11 +56,15 @@ class RGWRESTConn RGWAccessKey key; string self_zone_group; string remote_id; - atomic_t counter; + std::atomic<int64_t> counter = { 0 }; public: RGWRESTConn(CephContext *_cct, RGWRados *store, const string& _remote_id, const list<string>& endpoints); + // custom move needed for atomic + RGWRESTConn(RGWRESTConn&& other); + RGWRESTConn& operator=(RGWRESTConn&& other); + int get_url(string& endpoint); string get_url(); const string& get_self_zonegroup() { diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc index 973af3c8891..9220917c99a 100644 --- a/src/rgw/rgw_rest_log.cc +++ b/src/rgw/rgw_rest_log.cc @@ -20,6 +20,7 @@ #include "rgw_client_io.h" #include "rgw_sync.h" #include "rgw_data_sync.h" +#include "rgw_common.h" #include "common/errno.h" #include "include/assert.h" @@ -863,8 +864,7 @@ void RGWOp_MDLog_Status::execute() http_ret = -ENOENT; return; } - http_ret = sync->read_sync_status(); - status = sync->get_sync_status(); + http_ret = sync->read_sync_status(&status); } void RGWOp_MDLog_Status::send_response() diff --git a/src/rgw/rgw_rest_realm.cc b/src/rgw/rgw_rest_realm.cc index d5ef848b805..0f5abcdfe05 100644 --- a/src/rgw/rgw_rest_realm.cc +++ b/src/rgw/rgw_rest_realm.cc @@ -26,9 +26,7 @@ class RGWOp_Period_Base : public RGWRESTOp { // reply with the period object on success void RGWOp_Period_Base::send_response() { - s->err.message = error_stream.str(); - - set_req_state_err(s, http_ret); + set_req_state_err(s, http_ret, error_stream.str()); dump_errno(s); if (http_ret < 0) { diff --git a/src/rgw/rgw_rest_role.cc b/src/rgw/rgw_rest_role.cc index 386504398fe..40473fba67f 100644 --- a/src/rgw/rgw_rest_role.cc +++ b/src/rgw/rgw_rest_role.cc @@ -1,3 +1,5 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab #include <errno.h> #include "common/errno.h" @@ -15,8 +17,6 @@ #define dout_subsys ceph_subsys_rgw -using namespace std; - void RGWRestRole::send_response() { if (op_ret) { @@ -77,9 +77,7 @@ void RGWCreateRole::execute() if (op_ret < 0) { return; } - string uid; - s->user->user_id.to_str(uid); - RGWRole role(s->cct, store, role_name, role_path, trust_policy, uid); + RGWRole role(s->cct, store, role_name, role_path, trust_policy, s->user->user_id.tenant); op_ret = role.create(true); if (op_ret == -EEXIST) { @@ -111,7 +109,7 @@ void RGWDeleteRole::execute() if (op_ret < 0) { return; } - RGWRole role(s->cct, store, role_name); + RGWRole role(s->cct, store, role_name, s->user->user_id.tenant); op_ret = role.delete_obj(); if (op_ret == -ENOENT) { @@ -137,7 +135,7 @@ void RGWGetRole::execute() if (op_ret < 0) { return; } - RGWRole role(s->cct, store, role_name); + RGWRole role(s->cct, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == -ENOENT) { @@ -175,7 +173,7 @@ void RGWModifyRole::execute() if (op_ret < 0) { return; } - RGWRole role(s->cct, store, role_name); + RGWRole role(s->cct, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == -ENOENT) { op_ret = -ERR_NO_ROLE_FOUND; @@ -201,7 +199,7 @@ void RGWListRoles::execute() return; } vector<RGWRole> result; - op_ret = RGWRole::get_roles_by_path_prefix(store, s->cct, path_prefix, result); + op_ret = RGWRole::get_roles_by_path_prefix(store, s->cct, path_prefix, s->user->user_id.tenant, result); if (op_ret == 0) { s->formatter->open_array_section("Roles"); @@ -240,7 +238,7 @@ void RGWPutRolePolicy::execute() return; } - RGWRole role(s->cct, store, role_name); + RGWRole role(s->cct, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == 0) { role.set_perm_policy(policy_name, perm_policy); @@ -267,7 +265,7 @@ void RGWGetRolePolicy::execute() return; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == -ENOENT) { @@ -306,7 +304,7 @@ void RGWListRolePolicies::execute() return; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == -ENOENT) { @@ -342,7 +340,7 @@ void RGWDeleteRolePolicy::execute() return; } - RGWRole role(g_ceph_context, store, role_name); + RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant); op_ret = role.get(); if (op_ret == -ENOENT) { diff --git a/src/rgw/rgw_rest_role.h b/src/rgw/rgw_rest_role.h index de3ec97a637..7a99dbe45c6 100644 --- a/src/rgw/rgw_rest_role.h +++ b/src/rgw/rgw_rest_role.h @@ -1,3 +1,5 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab #ifndef CEPH_RGW_REST_ROLE_H #define CEPH_RGW_REST_ROLE_H diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 1c9e4d751b0..96285e258ff 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -65,14 +65,13 @@ void dump_bucket(struct req_state *s, RGWBucketEnt& obj) s->formatter->close_section(); } -void rgw_get_errno_s3(rgw_http_errors *e , int err_no) +void rgw_get_errno_s3(rgw_http_error *e , int err_no) { - const struct rgw_http_errors *r; - r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS)); + rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no); - if (r) { - e->http_ret = r->http_ret; - e->s3_code = r->s3_code; + if (r != rgw_http_s3_errors.end()) { + e->http_ret = r->second.first; + e->s3_code = r->second.second; } else { e->http_ret = 500; e->s3_code = "UnknownError"; @@ -173,7 +172,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, dump_errno(s, custom_http_ret); } else { set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT - : op_ret); + : op_ret); dump_errno(s); } @@ -1406,7 +1405,6 @@ static inline int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& o RGWRados::Object::Read read_op(&op_target); read_op.params.attrs = &attrs; - read_op.params.perr = &s->err; return read_op.prepare(); } @@ -1494,265 +1492,6 @@ int RGWPutObj_ObjStore_S3::get_encrypt_filter( } return res; } -/* - * parses params in the format: 'first; param1=foo; param2=bar' - */ -static void parse_params(const string& params_str, string& first, - map<string, string>& params) -{ - size_t pos = params_str.find(';'); - if (pos == string::npos) { - first = rgw_trim_whitespace(params_str); - return; - } - - first = rgw_trim_whitespace(params_str.substr(0, pos)); - - pos++; - - while (pos < params_str.size()) { - size_t end = params_str.find(';', pos); - if (end == string::npos) - end = params_str.size(); - - string param = params_str.substr(pos, end - pos); - - size_t eqpos = param.find('='); - if (eqpos != string::npos) { - params[rgw_trim_whitespace(param.substr(0, eqpos))] = - rgw_trim_quotes(param.substr(eqpos + 1)); - } else { - params[rgw_trim_whitespace(param)] = ""; - } - - pos = end + 1; - } -} - -static int parse_part_field(const string& line, string& field_name, - struct post_part_field& field) -{ - size_t pos = line.find(':'); - if (pos == string::npos) - return -EINVAL; - - field_name = line.substr(0, pos); - if (pos >= line.size() - 1) - return 0; - - parse_params(line.substr(pos + 1), field.val, field.params); - - return 0; -} - -bool is_crlf(const char *s) -{ - return (*s == '\r' && *(s + 1) == '\n'); -} - -/* - * find the index of the boundary, if exists, or optionally the next end of line - * also returns how many bytes to skip - */ -static int index_of(bufferlist& bl, int max_len, const string& str, - bool check_crlf, - bool *reached_boundary, int *skip) -{ - *reached_boundary = false; - *skip = 0; - - if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks) - return -EINVAL; - - if (bl.length() < str.size()) - return -1; - - const char *buf = bl.c_str(); - const char *s = str.c_str(); - - if (max_len > (int)bl.length()) - max_len = bl.length(); - - int i; - for (i = 0; i < max_len; i++, buf++) { - if (check_crlf && - i >= 1 && - is_crlf(buf - 1)) { - return i + 1; // skip the crlf - } - if ((i < max_len - (int)str.size() + 1) && - (buf[0] == s[0] && buf[1] == s[1]) && - (strncmp(buf, s, str.size()) == 0)) { - *reached_boundary = true; - *skip = str.size(); - - /* oh, great, now we need to swallow the preceding crlf - * if exists - */ - if ((i >= 2) && - is_crlf(buf - 2)) { - i -= 2; - *skip += 2; - } - return i; - } - } - - return -1; -} - -int RGWPostObj_ObjStore_S3::read_with_boundary(bufferlist& bl, uint64_t max, - bool check_crlf, - bool *reached_boundary, - bool *done) -{ - uint64_t cl = max + 2 + boundary.size(); - - if (max > in_data.length()) { - uint64_t need_to_read = cl - in_data.length(); - - bufferptr bp(need_to_read); - - const auto read_len = recv_body(s, bp.c_str(), need_to_read); - in_data.append(bp, 0, read_len); - } - - *done = false; - int skip; - int index = index_of(in_data, cl, boundary, check_crlf, reached_boundary, - &skip); - if (index >= 0) - max = index; - - if (max > in_data.length()) - max = in_data.length(); - - bl.substr_of(in_data, 0, max); - - bufferlist new_read_data; - - /* - * now we need to skip boundary for next time, also skip any crlf, or - * check to see if it's the last final boundary (marked with "--" at the end - */ - if (*reached_boundary) { - int left = in_data.length() - max; - if (left < skip + 2) { - int need = skip + 2 - left; - bufferptr boundary_bp(need); - recv_body(s, boundary_bp.c_str(), need); - in_data.append(boundary_bp); - } - max += skip; // skip boundary for next time - if (in_data.length() >= max + 2) { - const char *data = in_data.c_str(); - if (is_crlf(data + max)) { - max += 2; - } else { - if (*(data + max) == '-' && - *(data + max + 1) == '-') { - *done = true; - max += 2; - } - } - } - } - - new_read_data.substr_of(in_data, max, in_data.length() - max); - in_data = new_read_data; - - return 0; -} - -int RGWPostObj_ObjStore_S3::read_line(bufferlist& bl, uint64_t max, - bool *reached_boundary, bool *done) -{ - return read_with_boundary(bl, max, true, reached_boundary, done); -} - -int RGWPostObj_ObjStore_S3::read_data(bufferlist& bl, uint64_t max, - bool *reached_boundary, bool *done) -{ - return read_with_boundary(bl, max, false, reached_boundary, done); -} - - -int RGWPostObj_ObjStore_S3::read_form_part_header(struct post_form_part *part, - bool *done) -{ - bufferlist bl; - bool reached_boundary; - uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; - int r = read_line(bl, chunk_size, &reached_boundary, done); - if (r < 0) - return r; - - if (*done) { - return 0; - } - - if (reached_boundary) { // skip the first boundary - r = read_line(bl, chunk_size, &reached_boundary, done); - if (r < 0) - return r; - if (*done) - return 0; - } - - while (true) { - /* - * iterate through fields - */ - string line = rgw_trim_whitespace(string(bl.c_str(), bl.length())); - - if (line.empty()) - break; - - struct post_part_field field; - - string field_name; - r = parse_part_field(line, field_name, field); - if (r < 0) - return r; - - part->fields[field_name] = field; - - if (stringcasecmp(field_name, "Content-Disposition") == 0) { - part->name = field.params["name"]; - } - - if (reached_boundary) - break; - - r = read_line(bl, chunk_size, &reached_boundary, done); - } - - return 0; -} - -bool RGWPostObj_ObjStore_S3::part_str(const string& name, string *val) -{ - map<string, struct post_form_part, ltstr_nocase>::iterator iter - = parts.find(name); - if (iter == parts.end()) - return false; - - bufferlist& data = iter->second.data; - string str = string(data.c_str(), data.length()); - *val = rgw_trim_whitespace(str); - return true; -} - -bool RGWPostObj_ObjStore_S3::part_bl(const string& name, bufferlist *pbl) -{ - map<string, struct post_form_part, ltstr_nocase>::iterator iter = - parts.find(name); - if (iter == parts.end()) - return false; - - *pbl = iter->second.data; - return true; -} void RGWPostObj_ObjStore_S3::rebuild_key(string& key) { @@ -1768,74 +1507,47 @@ void RGWPostObj_ObjStore_S3::rebuild_key(string& key) key = new_key; } -int RGWPostObj_ObjStore_S3::get_params() +std::string RGWPostObj_ObjStore_S3::get_current_filename() const { - // get the part boundary - string req_content_type_str = s->info.env->get("CONTENT_TYPE", ""); - string req_content_type; - map<string, string> params; - - if (s->expect_cont) { - /* ok, here it really gets ugly. With POST, the params are embedded in the - * request body, so we need to continue before being able to actually look - * at them. This diverts from the usual request flow. - */ - dump_continue(s); - s->expect_cont = false; - } - - parse_params(req_content_type_str, req_content_type, params); + return s->object.name; +} - if (req_content_type.compare("multipart/form-data") != 0) { - err_msg = "Request Content-Type is not multipart/form-data"; - return -EINVAL; - } +std::string RGWPostObj_ObjStore_S3::get_current_content_type() const +{ + return content_type; +} - if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { - ldout(s->cct, 20) << "request content_type_str=" - << req_content_type_str << dendl; - ldout(s->cct, 20) << "request content_type params:" << dendl; - map<string, string>::iterator iter; - for (iter = params.begin(); iter != params.end(); ++iter) { - ldout(s->cct, 20) << " " << iter->first << " -> " << iter->second - << dendl; - } +int RGWPostObj_ObjStore_S3::get_params() +{ + op_ret = RGWPostObj_ObjStore::get_params(); + if (op_ret < 0) { + return op_ret; } ldout(s->cct, 20) << "adding bucket to policy env: " << s->bucket.name << dendl; env.add_var("bucket", s->bucket.name); - map<string, string>::iterator iter = params.find("boundary"); - if (iter == params.end()) { - err_msg = "Missing multipart boundary specification"; - return -EINVAL; - } - - // create the boundary - boundary = "--"; - boundary.append(iter->second); - bool done; do { struct post_form_part part; - int r = read_form_part_header(&part, &done); + int r = read_form_part_header(&part, done); if (r < 0) return r; if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { - map<string, struct post_part_field, ltstr_nocase>::iterator piter; - for (piter = part.fields.begin(); piter != part.fields.end(); ++piter) { - ldout(s->cct, 20) << "read part header: name=" << part.name - << " content_type=" << part.content_type << dendl; - ldout(s->cct, 20) << "name=" << piter->first << dendl; - ldout(s->cct, 20) << "val=" << piter->second.val << dendl; - ldout(s->cct, 20) << "params:" << dendl; - map<string, string>& params = piter->second.params; - for (iter = params.begin(); iter != params.end(); ++iter) { - ldout(s->cct, 20) << " " << iter->first << " -> " << iter->second - << dendl; - } + ldout(s->cct, 20) << "read part header -- part.name=" + << part.name << dendl; + + for (const auto& pair : part.fields) { + ldout(s->cct, 20) << "field.name=" << pair.first << dendl; + ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl; + ldout(s->cct, 20) << "field.params:" << dendl; + + for (const auto& param_pair : pair.second.params) { + ldout(s->cct, 20) << " " << param_pair.first + << " -> " << param_pair.second << dendl; + } } } @@ -1851,13 +1563,12 @@ int RGWPostObj_ObjStore_S3::get_params() filename = iter->second; } parts[part.name] = part; - data_pending = true; break; } bool boundary; uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; - r = read_data(part.data, chunk_size, &boundary, &done); + r = read_data(part.data, chunk_size, boundary, done); if (!boundary) { err_msg = "Couldn't find boundary"; return -EINVAL; @@ -1868,7 +1579,7 @@ int RGWPostObj_ObjStore_S3::get_params() } while (!done); string object_str; - if (!part_str("key", &object_str)) { + if (!part_str(parts, "key", &object_str)) { err_msg = "Key not specified"; return -EINVAL; } @@ -1884,7 +1595,7 @@ int RGWPostObj_ObjStore_S3::get_params() env.add_var("key", s->object.name); - part_str("Content-Type", &content_type); + part_str(parts, "Content-Type", &content_type); env.add_var("Content-Type", content_type); map<string, struct post_form_part, ltstr_nocase>::iterator piter = @@ -1935,16 +1646,16 @@ int RGWPostObj_ObjStore_S3::get_params() int RGWPostObj_ObjStore_S3::get_policy() { - if (part_bl("policy", &s->auth.s3_postobj_creds.encoded_policy)) { - + if (part_bl(parts, "policy", &s->auth.s3_postobj_creds.encoded_policy)) { // check that the signature matches the encoded policy - if (! part_str("AWSAccessKeyId", &s->auth.s3_postobj_creds.access_key)) { + if (!part_str(parts, "AWSAccessKeyId", + &s->auth.s3_postobj_creds.access_key)) { ldout(s->cct, 0) << "No S3 access key found!" << dendl; err_msg = "Missing access key"; return -EINVAL; } - string received_signature_str; - if (! part_str("signature", &s->auth.s3_postobj_creds.signature)) { + + if (!part_str(parts, "signature", &s->auth.s3_postobj_creds.signature)) { ldout(s->cct, 0) << "No signature found!" << dendl; err_msg = "Missing signature"; return -EINVAL; @@ -2020,7 +1731,7 @@ int RGWPostObj_ObjStore_S3::get_policy() } string canned_acl; - part_str("acl", &canned_acl); + part_str(parts, "acl", &canned_acl); RGWAccessControlPolicy_S3 s3policy(s->cct); ldout(s->cct, 20) << "canned_acl=" << canned_acl << dendl; @@ -2039,44 +1750,47 @@ int RGWPostObj_ObjStore_S3::complete_get_params() bool done; do { struct post_form_part part; - int r = read_form_part_header(&part, &done); - if (r < 0) + int r = read_form_part_header(&part, done); + if (r < 0) { return r; + } - bufferlist part_data; + ceph::bufferlist part_data; bool boundary; uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; - r = read_data(part.data, chunk_size, &boundary, &done); + r = read_data(part.data, chunk_size, boundary, done); if (!boundary) { return -EINVAL; } - parts[part.name] = part; + /* Just reading the data but not storing any results of that. */ } while (!done); return 0; } -int RGWPostObj_ObjStore_S3::get_data(bufferlist& bl) +int RGWPostObj_ObjStore_S3::get_data(ceph::bufferlist& bl, bool& again) { bool boundary; bool done; - uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; - int r = read_data(bl, chunk_size, &boundary, &done); - if (r < 0) + const uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size; + int r = read_data(bl, chunk_size, boundary, done); + if (r < 0) { return r; + } if (boundary) { - data_pending = false; - - if (!done) { /* reached end of data, let's drain the rest of the params */ + if (!done) { + /* Reached end of data, let's drain the rest of the params */ r = complete_get_params(); - if (r < 0) - return r; + if (r < 0) { + return r; + } } } + again = !boundary; return bl.length(); } @@ -2085,7 +1799,7 @@ void RGWPostObj_ObjStore_S3::send_response() if (op_ret == 0 && parts.count("success_action_redirect")) { string redirect; - part_str("success_action_redirect", &redirect); + part_str(parts, "success_action_redirect", &redirect); string tenant; string bucket; @@ -2135,7 +1849,7 @@ void RGWPostObj_ObjStore_S3::send_response() string status_string; uint32_t status_int; - part_str("success_action_status", &status_string); + part_str(parts, "success_action_status", &status_string); int r = stringtoul(status_string, &status_int); if (r < 0) { @@ -2938,7 +2652,7 @@ void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(rgw_obj_key& key, } s->formatter->close_section(); } else if (op_ret < 0) { - struct rgw_http_errors r; + struct rgw_http_error r; int err_no; s->formatter->open_object_section("Error"); @@ -3073,6 +2787,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_get() return new RGWListBucketMultiparts_ObjStore_S3; } else if(is_lc_op()) { return new RGWGetLC_ObjStore_S3; + } else if(is_policy_op()) { + return new RGWGetBucketPolicy; } return get_obj_op(true); } @@ -3107,6 +2823,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_put() return new RGWSetRequestPayment_ObjStore_S3; } else if(is_lc_op()) { return new RGWPutLC_ObjStore_S3; + } else if(is_policy_op()) { + return new RGWPutBucketPolicy; } return new RGWCreateBucket_ObjStore_S3; } @@ -3117,6 +2835,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_delete() return new RGWDeleteCORS_ObjStore_S3; } else if(is_lc_op()) { return new RGWDeleteLC_ObjStore_S3; + } else if(is_policy_op()) { + return new RGWDeleteBucketPolicy; } if (s->info.args.sub_resource_exists("website")) { @@ -3936,7 +3656,8 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s, bool force_b case RGW_OP_DELETE_MULTI_OBJ: case RGW_OP_ADMIN_SET_METADATA: case RGW_OP_SET_BUCKET_WEBSITE: - break; + case RGW_OP_PUT_BUCKET_POLICY: + break; default: dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl; return -ERR_NOT_IMPLEMENTED; @@ -4253,11 +3974,11 @@ int RGWHandler_REST_S3Website::serve_errordoc(int http_ret, const string& errord int RGWHandler_REST_S3Website::error_handler(int err_no, string* error_content) { int new_err_no = -1; - const struct rgw_http_errors* r; + rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no > 0 ? err_no : -err_no); int http_error_code = -1; - r = search_err(err_no > 0 ? err_no : -err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS)); - if (r) { - http_error_code = r->http_ret; + + if (r != rgw_http_s3_errors.end()) { + http_error_code = r->second.first; } ldout(s->cct, 10) << "RGWHandler_REST_S3Website::error_handler err_no=" << err_no << " http_ret=" << http_error_code << dendl; diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index 291b04b9281..8dcf242b283 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -27,7 +27,12 @@ #define RGW_AUTH_GRACE_MINS 15 -void rgw_get_errno_s3(struct rgw_http_errors *e, int err_no); +struct rgw_http_error { + int http_ret; + const char *s3_code; +}; + +void rgw_get_errno_s3(struct rgw_http_error *e, int err_no); class RGWGetObj_ObjStore_S3 : public RGWGetObj_ObjStore { @@ -195,46 +200,22 @@ public: bufferlist* manifest_bl) override; }; -struct post_part_field { - string val; - map<string, string> params; -}; - -struct post_form_part { - string name; - string content_type; - map<string, struct post_part_field, ltstr_nocase> fields; - bufferlist data; -}; - class RGWPostObj_ObjStore_S3 : public RGWPostObj_ObjStore { - string boundary; - string filename; - bufferlist in_data; - map<string, post_form_part, const ltstr_nocase> parts; + parts_collection_t parts; + std::string filename; + std::string content_type; RGWPolicyEnv env; RGWPolicy post_policy; - string err_msg; map<string, string> crypt_http_responses; const rgw::auth::StrategyRegistry* auth_registry_ptr = nullptr; - int read_with_boundary(bufferlist& bl, uint64_t max, bool check_eol, - bool *reached_boundary, - bool *done); - - int read_line(bufferlist& bl, uint64_t max, - bool *reached_boundary, bool *done); - - int read_data(bufferlist& bl, uint64_t max, bool *reached_boundary, bool *done); - - int read_form_part_header(struct post_form_part *part, - bool *done); - bool part_str(const string& name, string *val); - bool part_bl(const string& name, bufferlist *pbl); - int get_policy(); void rebuild_key(string& key); + + std::string get_current_filename() const override; + std::string get_current_content_type() const override; + public: RGWPostObj_ObjStore_S3() {} ~RGWPostObj_ObjStore_S3() override {} @@ -248,7 +229,7 @@ public: int complete_get_params(); void send_response() override; - int get_data(bufferlist& bl) override; + int get_data(ceph::bufferlist& bl, bool& again) override; int get_encrypt_filter(std::unique_ptr<RGWPutObjDataProcessor>* filter, RGWPutObjDataProcessor* cb) override; }; @@ -530,6 +511,9 @@ protected: bool is_request_payment_op() { return s->info.args.exists("requestPayment"); } + bool is_policy_op() { + return s->info.args.exists("policy"); + } RGWOp *get_obj_op(bool get_data); RGWOp *op_get() override; diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index 3d4a4d5bda8..6cca4f2efc2 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -306,6 +306,8 @@ void RGWListBucket_ObjStore_SWIFT::send_response() s->formatter->dump_string("name", key.name); s->formatter->dump_string("hash", iter->meta.etag); s->formatter->dump_int("bytes", iter->meta.accounted_size); + if (!iter->meta.user_data.empty()) + s->formatter->dump_string("user_custom_data", iter->meta.user_data); string single_content_type = iter->meta.content_type; if (iter->meta.content_type.size()) { // content type might hold multiple values, just dump the last one @@ -502,6 +504,7 @@ static int get_swift_container_settings(req_state * const s, RGWRados * const store, RGWAccessControlPolicy * const policy, bool * const has_policy, + uint32_t * rw_mask, RGWCORSConfiguration * const cors_config, bool * const has_cors) { @@ -524,7 +527,8 @@ static int get_swift_container_settings(req_state * const s, s->user->user_id, s->user->display_name, read_list, - write_list); + write_list, + *rw_mask); if (r < 0) { return r; } @@ -622,8 +626,10 @@ static int get_swift_versioning_settings( int RGWCreateBucket_ObjStore_SWIFT::get_params() { bool has_policy; + uint32_t policy_rw_mask = 0; - int r = get_swift_container_settings(s, store, &policy, &has_policy, &cors_config, &has_cors); + int r = get_swift_container_settings(s, store, &policy, &has_policy, + &policy_rw_mask, &cors_config, &has_cors); if (r < 0) { return r; } @@ -758,6 +764,13 @@ int RGWPutObj_ObjStore_SWIFT::get_params() return r; } + if (!s->cct->_conf->rgw_swift_custom_header.empty()) { + string custom_header = s->cct->_conf->rgw_swift_custom_header; + if (s->info.env->exists(custom_header.c_str())) { + user_data = s->info.env->get(custom_header.c_str()); + } + } + dlo_manifest = s->info.env->get("HTTP_X_OBJECT_MANIFEST"); bool exists; string multipart_manifest = s->info.args.get("multipart-manifest", &exists); @@ -895,7 +908,7 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params() } int r = get_swift_container_settings(s, store, &policy, &has_policy, - &cors_config, &has_cors); + &policy_rw_mask, &cors_config, &has_cors); if (r < 0) { return r; } @@ -943,7 +956,7 @@ void RGWPutMetadataObject_ObjStore_SWIFT::send_response() op_ret = STATUS_ACCEPTED; } set_req_state_err(s, op_ret); - if (!s->err.is_err()) { + if (!s->is_err()) { dump_content_length(s, 0); } dump_errno(s); @@ -969,7 +982,6 @@ static void bulkdelete_respond(const unsigned num_deleted, reason = fail_desc.err; } } - rgw_err err; set_req_state_err(err, reason, prot_flags); dump_errno(err, resp_status); @@ -1283,7 +1295,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, : op_ret); dump_errno(s); - if (s->err.is_err()) { + if (s->is_err()) { end_header(s, NULL); return 0; } @@ -1293,7 +1305,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, dump_range(s, ofs, end, s->obj_size); } - if (s->err.is_err()) { + if (s->is_err()) { end_header(s, NULL); return 0; } @@ -1713,6 +1725,278 @@ bool RGWInfo_ObjStore_SWIFT::is_expired(const std::string& expires, CephContext* return false; } + +void RGWFormPost::init(RGWRados* const store, + req_state* const s, + RGWHandler* const dialect_handler) +{ + prefix = std::move(s->object.name); + s->object = rgw_obj_key(); + + return RGWPostObj_ObjStore::init(store, s, dialect_handler); +} + +std::size_t RGWFormPost::get_max_file_size() /*const*/ +{ + std::string max_str = get_part_str(ctrl_parts, "max_file_size", "0"); + + std::string err; + const std::size_t max_file_size = + static_cast<uint64_t>(strict_strtoll(max_str.c_str(), 10, &err)); + + if (! err.empty()) { + ldout(s->cct, 5) << "failed to parse FormPost's max_file_size: " << err + << dendl; + return 0; + } + + return max_file_size; +} + +bool RGWFormPost::is_non_expired() +{ + std::string expires = get_part_str(ctrl_parts, "expires", "0"); + + std::string err; + const uint64_t expires_timestamp = + static_cast<uint64_t>(strict_strtoll(expires.c_str(), 10, &err)); + + if (! err.empty()) { + dout(5) << "failed to parse FormPost's expires: " << err << dendl; + return false; + } + + const utime_t now = ceph_clock_now(); + if (expires_timestamp <= static_cast<uint64_t>(now.sec())) { + dout(5) << "FormPost form expired: " + << expires_timestamp << " <= " << now.sec() << dendl; + return false; + } + + return true; +} + +bool RGWFormPost::is_integral() +{ + const std::string form_signature = get_part_str(ctrl_parts, "signature"); + + for (const auto& kv : s->user->temp_url_keys) { + const int temp_url_key_num = kv.first; + const string& temp_url_key = kv.second; + + if (temp_url_key.empty()) { + continue; + } + + SignatureHelper sig_helper; + sig_helper.calc(temp_url_key, + s->info.request_uri, + get_part_str(ctrl_parts, "redirect"), + get_part_str(ctrl_parts, "max_file_size", "0"), + get_part_str(ctrl_parts, "max_file_count", "0"), + get_part_str(ctrl_parts, "expires", "0")); + + const auto local_sig = sig_helper.get_signature(); + + ldout(s->cct, 20) << "FormPost signature [" << temp_url_key_num << "]" + << " (calculated): " << local_sig << dendl; + + if (sig_helper.is_equal_to(form_signature)) { + return true; + } else { + ldout(s->cct, 5) << "FormPost's signature mismatch: " + << local_sig << " != " << form_signature << dendl; + } + } + + return false; +} + +int RGWFormPost::get_params() +{ + /* The parentt class extracts boundary info from the Content-Type. */ + int ret = RGWPostObj_ObjStore::get_params(); + if (ret < 0) { + return ret; + } + + policy.create_default(s->user->user_id, s->user->display_name); + + /* Let's start parsing the HTTP body by parsing each form part step- + * by-step till encountering the first part with file data. */ + do { + struct post_form_part part; + ret = read_form_part_header(&part, stream_done); + if (ret < 0) { + return ret; + } + + if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { + ldout(s->cct, 20) << "read part header -- part.name=" + << part.name << dendl; + + for (const auto& pair : part.fields) { + ldout(s->cct, 20) << "field.name=" << pair.first << dendl; + ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl; + ldout(s->cct, 20) << "field.params:" << dendl; + + for (const auto& param_pair : pair.second.params) { + ldout(s->cct, 20) << " " << param_pair.first + << " -> " << param_pair.second << dendl; + } + } + } + + if (stream_done) { + /* Unexpected here. */ + err_msg = "Malformed request"; + return -EINVAL; + } + + const auto field_iter = part.fields.find("Content-Disposition"); + if (std::end(part.fields) != field_iter && + std::end(field_iter->second.params) != field_iter->second.params.find("filename")) { + /* First data part ahead. */ + current_data_part = std::move(part); + + /* Stop the iteration. We can assume that all control parts have been + * already parsed. The rest of HTTP body should contain data parts + * only. They will be picked up by ::get_data(). */ + break; + } else { + /* Control part ahead. Receive, parse and store for later usage. */ + bool boundary; + ret = read_data(part.data, s->cct->_conf->rgw_max_chunk_size, + boundary, stream_done); + if (ret < 0) { + return ret; + } else if (! boundary) { + err_msg = "Couldn't find boundary"; + return -EINVAL; + } + + ctrl_parts[part.name] = std::move(part); + } + } while (! stream_done); + + min_len = 0; + max_len = get_max_file_size(); + + if (! current_data_part) { + err_msg = "FormPost: no files to process"; + return -EINVAL; + } + + if (! is_non_expired()) { + err_msg = "FormPost: Form Expired"; + return -EPERM; + } + + if (! is_integral()) { + err_msg = "FormPost: Invalid Signature"; + return -EPERM; + } + + return 0; +} + +std::string RGWFormPost::get_current_filename() const +{ + try { + const auto& field = current_data_part->fields.at("Content-Disposition"); + const auto iter = field.params.find("filename"); + + if (std::end(field.params) != iter) { + return prefix + iter->second; + } + } catch (std::out_of_range&) { + /* NOP */; + } + + return prefix; +} + +std::string RGWFormPost::get_current_content_type() const +{ + try { + const auto& field = current_data_part->fields.at("Content-Type"); + return field.val; + } catch (std::out_of_range&) { + /* NOP */; + } + + return std::string(); +} + +bool RGWFormPost::is_next_file_to_upload() +{ + if (! stream_done) { + /* We have at least one additional part in the body. */ + struct post_form_part part; + int r = read_form_part_header(&part, stream_done); + if (r < 0) { + return false; + } + + const auto field_iter = part.fields.find("Content-Disposition"); + if (std::end(part.fields) != field_iter) { + const auto& params = field_iter->second.params; + + if (std::end(params) != params.find("filename")) { + current_data_part = std::move(part); + return true; + } + } + } + + return false; +} + +int RGWFormPost::get_data(ceph::bufferlist& bl, bool& again) +{ + bool boundary; + + int r = read_data(bl, s->cct->_conf->rgw_max_chunk_size, + boundary, stream_done); + if (r < 0) { + return r; + } + + /* Tell RGWPostObj::execute() that it has some data to put. */ + again = !boundary; + + return bl.length(); +} + +void RGWFormPost::send_response() +{ + std::string redirect = get_part_str(ctrl_parts, "redirect"); + if (! redirect.empty()) { + op_ret = STATUS_REDIRECT; + } + + set_req_state_err(s, op_ret); + s->err.s3_code = err_msg; + dump_errno(s); + if (! redirect.empty()) { + dump_redirect(s, redirect); + } + end_header(s, this); +} + +bool RGWFormPost::is_formpost_req(req_state* const s) +{ + std::string content_type; + std::map<std::string, std::string> params; + + parse_boundary_params(s->info.env->get("CONTENT_TYPE", ""), + content_type, params); + + return boost::algorithm::iequals(content_type, "multipart/form-data") && + params.count("boundary") > 0; +} + + RGWOp *RGWHandler_REST_Service_SWIFT::op_get() { return new RGWListBuckets_ObjStore_SWIFT; @@ -1788,9 +2072,8 @@ int RGWSwiftWebsiteHandler::error_handler(const int err_no, const auto& ws_conf = s->bucket_info.website_conf; if (can_be_website_req() && ! ws_conf.error_doc.empty()) { - struct rgw_err err; - set_req_state_err(err, err_no, s->prot_flags); - return serve_errordoc(err.http_ret, ws_conf.error_doc); + set_req_state_err(s, err_no); + return serve_errordoc(s->err.http_ret, ws_conf.error_doc); } /* Let's go to the default, no-op handler. */ @@ -2103,6 +2386,9 @@ RGWOp *RGWHandler_REST_Bucket_SWIFT::op_put() if (is_acl_op()) { return new RGWPutACLs_ObjStore_SWIFT; } + if(s->info.args.exists("extract-archive")) { + return new RGWBulkUploadOp_ObjStore_SWIFT; + } return new RGWCreateBucket_ObjStore_SWIFT; } @@ -2113,7 +2399,11 @@ RGWOp *RGWHandler_REST_Bucket_SWIFT::op_delete() RGWOp *RGWHandler_REST_Bucket_SWIFT::op_post() { - return new RGWPutMetadataBucket_ObjStore_SWIFT; + if (RGWFormPost::is_formpost_req(s)) { + return new RGWFormPost; + } else { + return new RGWPutMetadataBucket_ObjStore_SWIFT; + } } RGWOp *RGWHandler_REST_Bucket_SWIFT::op_options() @@ -2148,6 +2438,9 @@ RGWOp *RGWHandler_REST_Obj_SWIFT::op_put() if (is_acl_op()) { return new RGWPutACLs_ObjStore_SWIFT; } + if(s->info.args.exists("extract-archive")) { + return new RGWBulkUploadOp_ObjStore_SWIFT; + } if (s->init_state.src_bucket.empty()) return new RGWPutObj_ObjStore_SWIFT; else @@ -2161,7 +2454,11 @@ RGWOp *RGWHandler_REST_Obj_SWIFT::op_delete() RGWOp *RGWHandler_REST_Obj_SWIFT::op_post() { - return new RGWPutMetadataObject_ObjStore_SWIFT; + if (RGWFormPost::is_formpost_req(s)) { + return new RGWFormPost; + } else { + return new RGWPutMetadataObject_ObjStore_SWIFT; + } } RGWOp *RGWHandler_REST_Obj_SWIFT::op_copy() diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h index 8478b1bad7b..0b1293e286f 100644 --- a/src/rgw/rgw_rest_swift.h +++ b/src/rgw/rgw_rest_swift.h @@ -11,6 +11,9 @@ #include "rgw_op.h" #include "rgw_rest.h" #include "rgw_swift_auth.h" +#include "rgw_http_errors.h" + +#include <boost/utility/string_ref.hpp> class RGWGetObj_ObjStore_SWIFT : public RGWGetObj_ObjStore { int custom_http_ret = 0; @@ -238,6 +241,94 @@ public: }; +class RGWFormPost : public RGWPostObj_ObjStore { + std::string get_current_filename() const override; + std::string get_current_content_type() const override; + std::size_t get_max_file_size() /*const*/; + bool is_next_file_to_upload() override; + bool is_integral(); + bool is_non_expired(); + + parts_collection_t ctrl_parts; + boost::optional<post_form_part> current_data_part; + std::string prefix; + bool stream_done = false; + + class SignatureHelper; +public: + RGWFormPost() = default; + ~RGWFormPost() = default; + + void init(RGWRados* store, + req_state* s, + RGWHandler* dialect_handler) override; + + int get_params() override; + int get_data(ceph::bufferlist& bl, bool& again) override; + void send_response() override; + + static bool is_formpost_req(req_state* const s); +}; + +class RGWFormPost::SignatureHelper +{ +private: + static constexpr uint32_t output_size = + CEPH_CRYPTO_HMACSHA1_DIGESTSIZE * 2 + 1; + + unsigned char dest[CEPH_CRYPTO_HMACSHA1_DIGESTSIZE]; // 20 + char dest_str[output_size]; + +public: + SignatureHelper() = default; + + const char* calc(const std::string& key, + const boost::string_ref& path_info, + const boost::string_ref& redirect, + const boost::string_ref& max_file_size, + const boost::string_ref& max_file_count, + const boost::string_ref& expires) { + using ceph::crypto::HMACSHA1; + using UCHARPTR = const unsigned char*; + + HMACSHA1 hmac((UCHARPTR) key.data(), key.size()); + + hmac.Update((UCHARPTR) path_info.data(), path_info.size()); + hmac.Update((UCHARPTR) "\n", 1); + + hmac.Update((UCHARPTR) redirect.data(), redirect.size()); + hmac.Update((UCHARPTR) "\n", 1); + + hmac.Update((UCHARPTR) max_file_size.data(), max_file_size.size()); + hmac.Update((UCHARPTR) "\n", 1); + + hmac.Update((UCHARPTR) max_file_count.data(), max_file_count.size()); + hmac.Update((UCHARPTR) "\n", 1); + + hmac.Update((UCHARPTR) expires.data(), expires.size()); + + hmac.Final(dest); + + buf_to_hex((UCHARPTR) dest, sizeof(dest), dest_str); + + return dest_str; + } + + const char* get_signature() const { + return dest_str; + } + + bool is_equal_to(const std::string& rhs) const { + /* never allow out-of-range exception */ + if (rhs.size() < (output_size - 1)) { + return false; + } + return rhs.compare(0 /* pos */, output_size, dest_str) == 0; + } + +}; /* RGWFormPost::SignatureHelper */ + + class RGWSwiftWebsiteHandler { RGWRados* const store; req_state* const s; diff --git a/src/rgw/rgw_role.cc b/src/rgw/rgw_role.cc index 9ccc20e70e3..236fe6e6aa4 100644 --- a/src/rgw/rgw_role.cc +++ b/src/rgw/rgw_role.cc @@ -1,5 +1,6 @@ #include <errno.h> #include <ctime> +#include <regex> #include "common/errno.h" #include "common/Formatter.h" @@ -38,7 +39,7 @@ int RGWRole::store_name(bool exclusive) RGWNameToId nameToId; nameToId.obj_id = id; - string oid = get_names_oid_prefix() + name; + string oid = tenant + get_names_oid_prefix() + name; bufferlist bl; ::encode(nameToId, bl); @@ -48,7 +49,7 @@ int RGWRole::store_name(bool exclusive) int RGWRole::store_path(bool exclusive) { - string oid = get_path_oid_prefix() + path + get_info_oid_prefix() + id; + string oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id; return rgw_put_system_obj(store, store->get_zone_params().roles_pool, oid, NULL, 0, exclusive, NULL, real_time(), NULL); @@ -58,8 +59,12 @@ int RGWRole::create(bool exclusive) { int ret; + if (! validate_input()) { + return -EINVAL; + } + /* check to see the name is not used */ - ret = read_id(name, id); + ret = read_id(name, tenant, id); if (exclusive && ret == 0) { ldout(cct, 0) << "ERROR: name " << name << " already in use for role id " << id << dendl; @@ -78,7 +83,7 @@ int RGWRole::create(bool exclusive) id = uuid_str; //arn - arn = role_arn_prefix + uid + ":role" + path + name; + arn = role_arn_prefix + tenant + ":role" + path + name; // Creation time real_clock::time_point t = real_clock::now(); @@ -128,7 +133,7 @@ int RGWRole::create(bool exclusive) << id << ": " << cpp_strerror(-info_ret) << dendl; } //Delete role name that was stored in previous call - oid = get_names_oid_prefix() + name; + oid = tenant + get_names_oid_prefix() + name; int name_ret = rgw_delete_system_obj(store, pool, oid, NULL); if (name_ret < 0) { ldout(cct, 0) << "ERROR: cleanup of role name from pool: " << pool.name << ": " @@ -166,7 +171,7 @@ int RGWRole::delete_obj() } // Delete name - oid = get_names_oid_prefix() + name; + oid = tenant + get_names_oid_prefix() + name; ret = rgw_delete_system_obj(store, pool, oid, NULL); if (ret < 0) { ldout(cct, 0) << "ERROR: deleting role name from pool: " << pool.name << ": " @@ -174,7 +179,7 @@ int RGWRole::delete_obj() } // Delete path - oid = get_path_oid_prefix() + path + get_info_oid_prefix() + id; + oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id; ret = rgw_delete_system_obj(store, pool, oid, NULL); if (ret < 0) { ldout(cct, 0) << "ERROR: deleting role path from pool: " << pool.name << ": " @@ -198,6 +203,16 @@ int RGWRole::get() return 0; } +int RGWRole::get_by_id() +{ + int ret = read_info(); + if (ret < 0) { + return ret; + } + + return 0; +} + int RGWRole::update() { auto& pool = store->get_zone_params().roles_pool; @@ -272,10 +287,10 @@ void RGWRole::decode_json(JSONObj *obj) JSONDecoder::decode_json("assume_role_policy_document", trust_policy, obj); } -int RGWRole::read_id(const string& role_name, string& role_id) +int RGWRole::read_id(const string& role_name, const string& tenant, string& role_id) { auto& pool = store->get_zone_params().roles_pool; - string oid = get_names_oid_prefix() + role_name; + string oid = tenant + get_names_oid_prefix() + role_name; bufferlist bl; RGWObjectCtx obj_ctx(store); @@ -326,7 +341,7 @@ int RGWRole::read_info() int RGWRole::read_name() { auto& pool = store->get_zone_params().roles_pool; - string oid = get_names_oid_prefix() + name; + string oid = tenant + get_names_oid_prefix() + name; bufferlist bl; RGWObjectCtx obj_ctx(store); @@ -350,21 +365,61 @@ int RGWRole::read_name() return 0; } +bool RGWRole::validate_input() +{ + if (name.length() > MAX_ROLE_NAME_LEN) { + ldout(cct, 0) << "ERROR: Invalid name length " << dendl; + return false; + } + + if (path.length() > MAX_PATH_NAME_LEN) { + ldout(cct, 0) << "ERROR: Invalid path length " << dendl; + return false; + } + + std::regex regex_name("[A-Za-z0-9:=,.@-]+"); + if (! std::regex_match(name, regex_name)) { + ldout(cct, 0) << "ERROR: Invalid chars in name " << dendl; + return false; + } + + std::regex regex_path("(/[!-~]+/)|(/)"); + if (! std::regex_match(path,regex_path)) { + ldout(cct, 0) << "ERROR: Invalid chars in path " << dendl; + return false; + } + + return true; +} + +void RGWRole::extract_name_tenant(const std::string& str) +{ + size_t pos = str.find('$'); + if (pos != std::string::npos) { + tenant = str.substr(0, pos); + name = str.substr(pos + 1); + } +} + void RGWRole::update_trust_policy(string& trust_policy) { this->trust_policy = trust_policy; } -int RGWRole::get_roles_by_path_prefix(RGWRados *store, CephContext *cct, const string& path_prefix, vector<RGWRole>& roles) +int RGWRole::get_roles_by_path_prefix(RGWRados *store, + CephContext *cct, + const string& path_prefix, + const string& tenant, + vector<RGWRole>& roles) { auto pool = store->get_zone_params().roles_pool; string prefix; // List all roles if path prefix is empty if (! path_prefix.empty()) { - prefix = role_path_oid_prefix + path_prefix; + prefix = tenant + role_path_oid_prefix + path_prefix; } else { - prefix = role_path_oid_prefix; + prefix = tenant + role_path_oid_prefix; } //Get the filtered objects diff --git a/src/rgw/rgw_role.h b/src/rgw/rgw_role.h index f8a60a4c568..bccb576ed2d 100644 --- a/src/rgw/rgw_role.h +++ b/src/rgw/rgw_role.h @@ -7,6 +7,8 @@ class RGWRole static const string role_oid_prefix; static const string role_path_oid_prefix; static const string role_arn_prefix; + static constexpr int MAX_ROLE_NAME_LEN = 64; + static constexpr int MAX_PATH_NAME_LEN = 512; CephContext *cct; RGWRados *store; @@ -17,15 +19,17 @@ class RGWRole string creation_date; string trust_policy; map<string, string> perm_policy_map; - string uid; + string tenant; int store_info(bool exclusive); int store_name(bool exclusive); int store_path(bool exclusive); - int read_id(const string& role_name, string& role_id); + int read_id(const string& role_name, const string& tenant, string& role_id); int read_name(); int read_info(); void set_id(const string& id) { this->id = id; } + bool validate_input(); + void extract_name_tenant(const std::string& str); public: RGWRole(CephContext *cct, @@ -33,23 +37,35 @@ public: string name, string path, string trust_policy, - string uid) + string tenant) : cct(cct), store(store), name(std::move(name)), path(std::move(path)), trust_policy(std::move(trust_policy)), - uid(std::move(uid)) { + tenant(std::move(tenant)) { if (this->path.empty()) this->path = "/"; + extract_name_tenant(this->name); } RGWRole(CephContext *cct, RGWRados *store, - string name) + string name, + string tenant) + : cct(cct), + store(store), + name(std::move(name)), + tenant(std::move(tenant)) { + extract_name_tenant(this->name); + } + + RGWRole(CephContext *cct, + RGWRados *store, + string id) : cct(cct), store(store), - name(std::move(name)) {} + id(std::move(id)) {} RGWRole(CephContext *cct, RGWRados *store) @@ -61,7 +77,7 @@ public: ~RGWRole() = default; void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); ::encode(id, bl); ::encode(name, bl); ::encode(path, bl); @@ -69,11 +85,12 @@ public: ::encode(creation_date, bl); ::encode(trust_policy, bl); ::encode(perm_policy_map, bl); + ::encode(tenant, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); ::decode(id, bl); ::decode(name, bl); ::decode(path, bl); @@ -81,6 +98,9 @@ public: ::decode(creation_date, bl); ::decode(trust_policy, bl); ::decode(perm_policy_map, bl); + if (struct_v >= 2) { + ::decode(tenant, bl); + } DECODE_FINISH(bl); } @@ -88,10 +108,12 @@ public: const string& get_name() const { return name; } const string& get_path() const { return path; } const string& get_create_date() const { return creation_date; } + const string& get_assume_role_policy() const { return trust_policy;} int create(bool exclusive); int delete_obj(); int get(); + int get_by_id(); int update(); void update_trust_policy(string& trust_policy); void set_perm_policy(const string& policy_name, const string& perm_policy); @@ -104,7 +126,11 @@ public: static const string& get_names_oid_prefix(); static const string& get_info_oid_prefix(); static const string& get_path_oid_prefix(); - static int get_roles_by_path_prefix(RGWRados *store, CephContext *cct, const string& path_prefix, vector<RGWRole>& roles); + static int get_roles_by_path_prefix(RGWRados *store, + CephContext *cct, + const string& path_prefix, + const string& tenant, + vector<RGWRole>& roles); }; WRITE_CLASS_ENCODER(RGWRole) #endif /* CEPH_RGW_ROLE_H */ diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index be1fa2fdd88..9d8d250132b 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -52,7 +52,7 @@ RGWCoroutine *RGWSyncErrorLogger::log_error_cr(const string& source_zone, const ::encode(info, bl); store->time_log_prepare_entry(entry, real_clock::now(), section, name, bl); - uint32_t shard_id = counter.inc() % num_shards; + uint32_t shard_id = ++counter % num_shards; return new RGWRadosTimelogAddCR(store, oids[shard_id], entry); @@ -83,8 +83,8 @@ void RGWSyncBackoff::backoff(RGWCoroutine *op) } int RGWBackoffControlCR::operate() { - RGWCoroutine *finisher_cr; reenter(this) { + // retry the operation until it succeeds while (true) { yield { Mutex::Locker l(lock); @@ -97,7 +97,10 @@ int RGWBackoffControlCR::operate() { cr->put(); cr = NULL; } - if (retcode < 0 && retcode != -EBUSY && retcode != -EAGAIN) { + if (retcode >= 0) { + break; + } + if (retcode != -EBUSY && retcode != -EAGAIN) { ldout(cct, 0) << "ERROR: RGWBackoffControlCR called coroutine returned " << retcode << dendl; if (exit_on_error) { return set_cr_error(retcode); @@ -107,17 +110,15 @@ int RGWBackoffControlCR::operate() { backoff.reset(); } yield backoff.backoff(this); - finisher_cr = alloc_finisher_cr(); - if (finisher_cr) { - yield call(finisher_cr); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl; - if (exit_on_error) { - return set_cr_error(retcode); - } - } - } } + + // run an optional finisher + yield call(alloc_finisher_cr()); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); } return 0; } @@ -289,7 +290,7 @@ int RGWRemoteMetaLog::init() void RGWRemoteMetaLog::finish() { - going_down.set(1); + going_down = true; stop(); } @@ -320,13 +321,14 @@ int RGWMetaSyncStatusManager::init() RGWMetaSyncEnv& sync_env = master_log.get_sync_env(); - r = read_sync_status(); + rgw_meta_sync_status sync_status; + r = read_sync_status(&sync_status); if (r < 0 && r != -ENOENT) { lderr(store->ctx()) << "ERROR: failed to read sync status, r=" << r << dendl; return r; } - int num_shards = master_log.get_sync_status().sync_info.num_shards; + int num_shards = sync_status.sync_info.num_shards; for (int i = 0; i < num_shards; i++) { shard_objs[i] = rgw_raw_obj(store->get_zone_params().log_pool, sync_env.shard_obj_name(i)); @@ -691,36 +693,75 @@ public: } }; -class RGWReadSyncStatusCoroutine : public RGWSimpleRadosReadCR<rgw_meta_sync_info> { - RGWMetaSyncEnv *sync_env; +class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; - rgw_meta_sync_status *sync_status; + RGWMetaSyncEnv *env; + const int num_shards; + int shard_id{0}; + map<uint32_t, rgw_meta_sync_marker>& markers; -public: - RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, - rgw_meta_sync_status *_status) : RGWSimpleRadosReadCR(_sync_env->async_rados, _sync_env->store, - rgw_raw_obj(_sync_env->store->get_zone_params().log_pool, _sync_env->status_oid()), - &_status->sync_info), - sync_env(_sync_env), - sync_status(_status) { + public: + RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards, + map<uint32_t, rgw_meta_sync_marker>& markers) + : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS), + env(env), num_shards(num_shards), markers(markers) + {} + bool spawn_next() override; +}; +bool RGWReadSyncStatusMarkersCR::spawn_next() +{ + if (shard_id >= num_shards) { + return false; } + using CR = RGWSimpleRadosReadCR<rgw_meta_sync_marker>; + rgw_raw_obj obj{env->store->get_zone_params().log_pool, + env->shard_obj_name(shard_id)}; + spawn(new CR(env->async_rados, env->store, obj, &markers[shard_id]), false); + shard_id++; + return true; +} + +class RGWReadSyncStatusCoroutine : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + rgw_meta_sync_status *sync_status; - int handle_data(rgw_meta_sync_info& data) override; +public: + RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, + rgw_meta_sync_status *_status) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status) + {} + int operate() override; }; -int RGWReadSyncStatusCoroutine::handle_data(rgw_meta_sync_info& data) +int RGWReadSyncStatusCoroutine::operate() { - if (retcode == -ENOENT) { - return 0; - } - - RGWRados *store = sync_env->store; - map<uint32_t, rgw_meta_sync_marker>& markers = sync_status->sync_markers; - for (int i = 0; i < (int)data.num_shards; i++) { - spawn(new RGWSimpleRadosReadCR<rgw_meta_sync_marker>(sync_env->async_rados, store, - rgw_raw_obj(store->get_zone_params().log_pool, sync_env->shard_obj_name(i)), - &markers[i]), true); + reenter(this) { + // read sync info + using ReadInfoCR = RGWSimpleRadosReadCR<rgw_meta_sync_info>; + yield { + bool empty_on_enoent = false; // fail on ENOENT + rgw_raw_obj obj{sync_env->store->get_zone_params().log_pool, + sync_env->status_oid()}; + call(new ReadInfoCR(sync_env->async_rados, sync_env->store, obj, + &sync_status->sync_info, empty_on_enoent)); + } + if (retcode < 0) { + ldout(sync_env->cct, 4) << "failed to read sync status info with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + // read shard markers + using ReadMarkersCR = RGWReadSyncStatusMarkersCR; + yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards, + sync_status->sync_markers)); + if (retcode < 0) { + ldout(sync_env->cct, 4) << "failed to read sync status markers with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); } return 0; } @@ -1103,7 +1144,7 @@ public: sync_marker.timestamp = timestamp; } - ldout(sync_env->cct, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << dendl; + ldout(sync_env->cct, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << " realm_epoch=" << sync_marker.realm_epoch << dendl; RGWRados *store = sync_env->store; return new RGWSimpleRadosWriteCR<rgw_meta_sync_marker>(sync_env->async_rados, store, @@ -1244,6 +1285,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine { const rgw_pool& pool; const std::string& period; //< currently syncing period id + const epoch_t realm_epoch; //< realm_epoch of period RGWMetadataLog* mdlog; //< log of syncing period uint32_t shard_id; rgw_meta_sync_marker& sync_marker; @@ -1291,11 +1333,13 @@ class RGWMetaSyncShardCR : public RGWCoroutine { public: RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, - const std::string& period, RGWMetadataLog* mdlog, - uint32_t _shard_id, rgw_meta_sync_marker& _marker, + const std::string& period, epoch_t realm_epoch, + RGWMetadataLog* mdlog, uint32_t _shard_id, + rgw_meta_sync_marker& _marker, const std::string& period_marker, bool *_reset_backoff) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool), - period(period), mdlog(mdlog), shard_id(_shard_id), sync_marker(_marker), + period(period), realm_epoch(realm_epoch), mdlog(mdlog), + shard_id(_shard_id), sync_marker(_marker), period_marker(period_marker), inc_lock("RGWMetaSyncShardCR::inc_lock"), reset_backoff(_reset_backoff) { *reset_backoff = false; @@ -1478,7 +1522,8 @@ public: temp_marker->state = rgw_meta_sync_marker::IncrementalSync; temp_marker->marker = std::move(temp_marker->next_step_marker); temp_marker->next_step_marker.clear(); - ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << temp_marker->marker << dendl; + temp_marker->realm_epoch = realm_epoch; + ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << temp_marker->marker << " realm_epoch=" << realm_epoch << dendl; using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_meta_sync_marker>; yield call(new WriteMarkerCR(sync_env->async_rados, sync_env->store, @@ -1549,6 +1594,14 @@ public: yield; } } + // if the period has advanced, we can't use the existing marker + if (sync_marker.realm_epoch < realm_epoch) { + ldout(sync_env->cct, 0) << "clearing marker=" << sync_marker.marker + << " from old realm_epoch=" << sync_marker.realm_epoch + << " (now " << realm_epoch << ')' << dendl; + sync_marker.realm_epoch = realm_epoch; + sync_marker.marker.clear(); + } mdlog_marker = sync_marker.marker; set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env, sync_env->shard_obj_name(shard_id), @@ -1570,7 +1623,8 @@ public: } #define INCREMENTAL_MAX_ENTRIES 100 ldout(sync_env->cct, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl; - if (!period_marker.empty() && period_marker <= marker) { + if (!period_marker.empty() && period_marker <= mdlog_marker) { + ldout(cct, 10) << "mdlog_marker past period_marker=" << period_marker << dendl; done_with_period = true; break; } @@ -1602,10 +1656,16 @@ public: *reset_backoff = false; // back off and try again later return retcode; } - for (log_iter = log_entries.begin(); log_iter != log_entries.end(); ++log_iter) { - if (!period_marker.empty() && period_marker < log_iter->id) { + for (log_iter = log_entries.begin(); log_iter != log_entries.end() && !done_with_period; ++log_iter) { + if (!period_marker.empty() && period_marker <= log_iter->id) { done_with_period = true; - break; + if (period_marker < log_iter->id) { + ldout(cct, 10) << "found key=" << log_iter->id + << " past period_marker=" << period_marker << dendl; + break; + } + ldout(cct, 10) << "found key at period_marker=" << period_marker << dendl; + // sync this entry, then return control to RGWMetaSyncCR } if (!mdlog_entry.convert_from(*log_iter)) { ldout(sync_env->cct, 0) << __func__ << ":" << __LINE__ << ": ERROR: failed to convert mdlog entry, shard_id=" << shard_id << " log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp << " ... skipping entry" << dendl; @@ -1631,6 +1691,7 @@ public: ldout(sync_env->cct, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " max_marker=" << max_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl; if (done_with_period) { // return control to RGWMetaSyncCR and advance to the next period + ldout(sync_env->cct, 10) << *this << ": done with period" << dendl; break; } if (mdlog_marker == max_marker && can_adjust_marker) { @@ -1655,6 +1716,8 @@ public: if (!can_adjust_marker) { return -EAGAIN; } + + return set_cr_done(); } /* TODO */ return 0; @@ -1667,6 +1730,7 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR const rgw_pool& pool; const std::string& period; + epoch_t realm_epoch; RGWMetadataLog* mdlog; uint32_t shard_id; rgw_meta_sync_marker sync_marker; @@ -1675,16 +1739,18 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR static constexpr bool exit_on_error = false; // retry on all errors public: RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, - const std::string& period, RGWMetadataLog* mdlog, - uint32_t _shard_id, const rgw_meta_sync_marker& _marker, + const std::string& period, epoch_t realm_epoch, + RGWMetadataLog* mdlog, uint32_t _shard_id, + const rgw_meta_sync_marker& _marker, std::string&& period_marker) : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env), - pool(_pool), period(period), mdlog(mdlog), shard_id(_shard_id), - sync_marker(_marker), period_marker(std::move(period_marker)) {} + pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog), + shard_id(_shard_id), sync_marker(_marker), + period_marker(std::move(period_marker)) {} RGWCoroutine *alloc_cr() override { - return new RGWMetaSyncShardCR(sync_env, pool, period, mdlog, shard_id, - sync_marker, period_marker, backoff_ptr()); + return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog, + shard_id, sync_marker, period_marker, backoff_ptr()); } RGWCoroutine *alloc_finisher_cr() override { @@ -1743,6 +1809,7 @@ public: yield { // get the mdlog for the current period (may be empty) auto& period_id = sync_status.sync_info.period; + auto realm_epoch = sync_status.sync_info.realm_epoch; auto mdlog = sync_env->store->meta_mgr->get_log(period_id); // prevent wakeup() from accessing shard_crs while we're spawning them @@ -1765,9 +1832,10 @@ public: } } - auto cr = new RGWMetaSyncShardControlCR(sync_env, pool, period_id, - mdlog, shard_id, marker, - std::move(period_marker)); + using ShardCR = RGWMetaSyncShardControlCR; + auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch, + mdlog, shard_id, marker, + std::move(period_marker)); auto stack = spawn(cr, false); shard_crs[shard_id] = RefPair{cr, stack}; } @@ -1821,13 +1889,24 @@ void RGWRemoteMetaLog::init_sync_env(RGWMetaSyncEnv *env) { env->error_logger = error_logger; } -int RGWRemoteMetaLog::read_sync_status() +int RGWRemoteMetaLog::read_sync_status(rgw_meta_sync_status *sync_status) { if (store->is_meta_master()) { return 0; } - - return run(new RGWReadSyncStatusCoroutine(&sync_env, &sync_status)); + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry()); + RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); + int ret = http_manager.set_threaded(); + if (ret < 0) { + ldout(store->ctx(), 0) << "failed in http_manager.set_threaded() ret=" << ret << dendl; + return ret; + } + RGWMetaSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + ret = crs.run(new RGWReadSyncStatusCoroutine(&sync_env_local, sync_status)); + http_manager.stop(); + return ret; } int RGWRemoteMetaLog::init_sync_status() @@ -1836,30 +1915,29 @@ int RGWRemoteMetaLog::init_sync_status() return 0; } - auto& sync_info = sync_status.sync_info; - if (!sync_info.num_shards) { - rgw_mdlog_info mdlog_info; - int r = read_log_info(&mdlog_info); - if (r < 0) { - lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; - return r; - } - sync_info.num_shards = mdlog_info.num_shards; - auto cursor = store->period_history->get_current(); - if (cursor) { - sync_info.period = cursor.get_period().get_id(); - sync_info.realm_epoch = cursor.get_epoch(); - } + rgw_mdlog_info mdlog_info; + int r = read_log_info(&mdlog_info); + if (r < 0) { + lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; + return r; + } + + rgw_meta_sync_info sync_info; + sync_info.num_shards = mdlog_info.num_shards; + auto cursor = store->period_history->get_current(); + if (cursor) { + sync_info.period = cursor.get_period().get_id(); + sync_info.realm_epoch = cursor.get_epoch(); } return run(new RGWInitSyncStatusCoroutine(&sync_env, sync_info)); } -int RGWRemoteMetaLog::store_sync_info() +int RGWRemoteMetaLog::store_sync_info(const rgw_meta_sync_info& sync_info) { return run(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(async_rados, store, rgw_raw_obj(store->get_zone_params().log_pool, sync_env.status_oid()), - sync_status.sync_info)); + sync_info)); } // return a cursor to the period at our sync position @@ -1914,7 +1992,7 @@ int RGWRemoteMetaLog::run_sync() // get shard count and oldest log period from master rgw_mdlog_info mdlog_info; for (;;) { - if (going_down.read()) { + if (going_down) { ldout(store->ctx(), 1) << __func__ << "(): going down" << dendl; return 0; } @@ -1933,8 +2011,9 @@ int RGWRemoteMetaLog::run_sync() break; } + rgw_meta_sync_status sync_status; do { - if (going_down.read()) { + if (going_down) { ldout(store->ctx(), 1) << __func__ << "(): going down" << dendl; return 0; } @@ -2008,7 +2087,7 @@ int RGWRemoteMetaLog::run_sync() } sync_status.sync_info.state = rgw_meta_sync_info::StateSync; - r = store_sync_info(); + r = store_sync_info(sync_status.sync_info); if (r < 0) { ldout(store->ctx(), 0) << "ERROR: failed to update sync status" << dendl; return r; @@ -2033,7 +2112,7 @@ int RGWRemoteMetaLog::run_sync() ldout(store->ctx(), 0) << "ERROR: bad sync state!" << dendl; return -EIO; } - } while (!going_down.read()); + } while (!going_down); return 0; } @@ -2234,3 +2313,690 @@ int RGWCloneMetaLogCoroutine::state_store_mdlog_entries_complete() } +// TODO: move into rgw_sync_trim.cc +#undef dout_prefix +#define dout_prefix (*_dout << "meta trim: ") + +/// purge all log shards for the given mdlog +class PurgeLogShardsCR : public RGWShardCollectCR { + RGWRados *const store; + const RGWMetadataLog* mdlog; + const int num_shards; + rgw_raw_obj obj; + int i{0}; + + static constexpr int max_concurrent = 16; + + public: + PurgeLogShardsCR(RGWRados *store, const RGWMetadataLog* mdlog, + const rgw_pool& pool, int num_shards) + : RGWShardCollectCR(store->ctx(), max_concurrent), + store(store), mdlog(mdlog), num_shards(num_shards), obj(pool, "") + {} + + bool spawn_next() override { + if (i == num_shards) { + return false; + } + mdlog->get_shard_oid(i++, obj.oid); + spawn(new RGWRadosRemoveCR(store, obj), false); + return true; + } +}; + +using Cursor = RGWPeriodHistory::Cursor; + +/// purge mdlogs from the oldest up to (but not including) the given realm_epoch +class PurgePeriodLogsCR : public RGWCoroutine { + RGWRados *const store; + RGWMetadataManager *const metadata; + RGWObjVersionTracker objv; + Cursor cursor; + epoch_t realm_epoch; + epoch_t *last_trim_epoch; //< update last trim on success + + public: + PurgePeriodLogsCR(RGWRados *store, epoch_t realm_epoch, epoch_t *last_trim) + : RGWCoroutine(store->ctx()), store(store), metadata(store->meta_mgr), + realm_epoch(realm_epoch), last_trim_epoch(last_trim) + {} + + int operate(); +}; + +int PurgePeriodLogsCR::operate() +{ + reenter(this) { + // read our current oldest log period + yield call(metadata->read_oldest_log_period_cr(&cursor, &objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + assert(cursor); + ldout(cct, 20) << "oldest log realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + + // trim -up to- the given realm_epoch + while (cursor.get_epoch() < realm_epoch) { + ldout(cct, 4) << "purging log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + yield { + const auto mdlog = metadata->get_log(cursor.get_period().get_id()); + const auto& pool = store->get_zone_params().log_pool; + auto num_shards = cct->_conf->rgw_md_log_max_shards; + call(new PurgeLogShardsCR(store, mdlog, pool, num_shards)); + } + if (retcode < 0) { + ldout(cct, 1) << "failed to remove log shards: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + ldout(cct, 10) << "removed log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + + // update our mdlog history + yield call(metadata->trim_log_period_cr(cursor, &objv)); + if (retcode == -ENOENT) { + // must have raced to update mdlog history. return success and allow the + // winner to continue purging + ldout(cct, 10) << "already removed log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + return set_cr_done(); + } else if (retcode < 0) { + ldout(cct, 1) << "failed to remove log shards for realm_epoch=" + << cursor.get_epoch() << " period=" << cursor.get_period().get_id() + << " with: " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (*last_trim_epoch < cursor.get_epoch()) { + *last_trim_epoch = cursor.get_epoch(); + } + + assert(cursor.has_next()); // get_current() should always come after + cursor.next(); + } + return set_cr_done(); + } + return 0; +} + +namespace { + +using connection_map = std::map<std::string, std::unique_ptr<RGWRESTConn>>; + +/// construct a RGWRESTConn for each zone in the realm +template <typename Zonegroups> +connection_map make_peer_connections(RGWRados *store, + const Zonegroups& zonegroups) +{ + connection_map connections; + for (auto& g : zonegroups) { + for (auto& z : g.second.zones) { + std::unique_ptr<RGWRESTConn> conn{ + new RGWRESTConn(store->ctx(), store, z.first, z.second.endpoints)}; + connections.emplace(z.first, std::move(conn)); + } + } + return connections; +} + +/// return the marker that it's safe to trim up to +const std::string& get_stable_marker(const rgw_meta_sync_marker& m) +{ + return m.state == m.FullSync ? m.next_step_marker : m.marker; +} + +/// comparison operator for take_min_status() +bool operator<(const rgw_meta_sync_marker& lhs, const rgw_meta_sync_marker& rhs) +{ + // sort by stable marker + return get_stable_marker(lhs) < get_stable_marker(rhs); +} + +/// populate the status with the minimum stable marker of each shard for any +/// peer whose realm_epoch matches the minimum realm_epoch in the input +template <typename Iter> +int take_min_status(CephContext *cct, Iter first, Iter last, + rgw_meta_sync_status *status) +{ + if (first == last) { + return -EINVAL; + } + const size_t num_shards = cct->_conf->rgw_md_log_max_shards; + + status->sync_info.realm_epoch = std::numeric_limits<epoch_t>::max(); + for (auto p = first; p != last; ++p) { + // validate peer's shard count + if (p->sync_markers.size() != num_shards) { + ldout(cct, 1) << "take_min_status got peer status with " + << p->sync_markers.size() << " shards, expected " + << num_shards << dendl; + return -EINVAL; + } + if (p->sync_info.realm_epoch < status->sync_info.realm_epoch) { + // earlier epoch, take its entire status + *status = std::move(*p); + } else if (p->sync_info.realm_epoch == status->sync_info.realm_epoch) { + // same epoch, take any earlier markers + auto m = status->sync_markers.begin(); + for (auto& shard : p->sync_markers) { + if (shard.second < m->second) { + m->second = std::move(shard.second); + } + ++m; + } + } + } + return 0; +} + +struct TrimEnv { + RGWRados *const store; + RGWHTTPManager *const http; + int num_shards; + const std::string& zone; + Cursor current; //< cursor to current period + epoch_t last_trim_epoch{0}; //< epoch of last mdlog that was purged + + TrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards) + : store(store), http(http), num_shards(num_shards), + zone(store->get_zone_params().get_id()), + current(store->period_history->get_current()) + {} +}; + +struct MasterTrimEnv : public TrimEnv { + connection_map connections; //< peer connections + std::vector<rgw_meta_sync_status> peer_status; //< sync status for each peer + /// last trim marker for each shard, only applies to current period's mdlog + std::vector<std::string> last_trim_markers; + + MasterTrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards) + : TrimEnv(store, http, num_shards), + last_trim_markers(num_shards) + { + auto& period = current.get_period(); + connections = make_peer_connections(store, period.get_map().zonegroups); + connections.erase(zone); + peer_status.resize(connections.size()); + } +}; + +struct PeerTrimEnv : public TrimEnv { + /// last trim timestamp for each shard, only applies to current period's mdlog + std::vector<ceph::real_time> last_trim_timestamps; + + PeerTrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards) + : TrimEnv(store, http, num_shards), + last_trim_timestamps(num_shards) + {} + + void set_num_shards(int num_shards) { + this->num_shards = num_shards; + last_trim_timestamps.resize(num_shards); + } +}; + +} // anonymous namespace + + +/// spawn a trim cr for each shard that needs it, while limiting the number +/// of concurrent shards +class MetaMasterTrimShardCollectCR : public RGWShardCollectCR { + private: + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + MasterTrimEnv& env; + RGWMetadataLog *mdlog; + int shard_id{0}; + std::string oid; + const rgw_meta_sync_status& sync_status; + + public: + MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog, + const rgw_meta_sync_status& sync_status) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), mdlog(mdlog), sync_status(sync_status) + {} + + bool spawn_next() override; +}; + +bool MetaMasterTrimShardCollectCR::spawn_next() +{ + while (shard_id < env.num_shards) { + auto m = sync_status.sync_markers.find(shard_id); + if (m == sync_status.sync_markers.end()) { + shard_id++; + continue; + } + auto& stable = get_stable_marker(m->second); + auto& last_trim = env.last_trim_markers[shard_id]; + + if (stable <= last_trim) { + // already trimmed + ldout(cct, 20) << "skipping log shard " << shard_id + << " at marker=" << stable + << " last_trim=" << last_trim + << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; + shard_id++; + continue; + } + + mdlog->get_shard_oid(shard_id, oid); + + ldout(cct, 10) << "trimming log shard " << shard_id + << " at marker=" << stable + << " last_trim=" << last_trim + << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; + spawn(new RGWSyncLogTrimCR(env.store, oid, stable, &last_trim), false); + shard_id++; + return true; + } + return false; +} + +/// spawn rest requests to read each peer's sync status +class MetaMasterStatusCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + MasterTrimEnv& env; + connection_map::iterator c; + std::vector<rgw_meta_sync_status>::iterator s; + public: + MetaMasterStatusCollectCR(MasterTrimEnv& env) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), c(env.connections.begin()), s(env.peer_status.begin()) + {} + + bool spawn_next() override { + if (c == env.connections.end()) { + return false; + } + static rgw_http_param_pair params[] = { + { "type", "metadata" }, + { "status", nullptr }, + { nullptr, nullptr } + }; + + ldout(cct, 20) << "query sync status from " << c->first << dendl; + auto conn = c->second.get(); + using StatusCR = RGWReadRESTResourceCR<rgw_meta_sync_status>; + spawn(new StatusCR(cct, conn, env.http, "/admin/log/", params, &*s), + false); + ++c; + ++s; + return true; + } +}; + +class MetaMasterTrimCR : public RGWCoroutine { + MasterTrimEnv& env; + rgw_meta_sync_status min_status; //< minimum sync status of all peers + int ret{0}; + + public: + MetaMasterTrimCR(MasterTrimEnv& env) + : RGWCoroutine(env.store->ctx()), env(env) + {} + + int operate(); +}; + +int MetaMasterTrimCR::operate() +{ + reenter(this) { + // TODO: detect this and fail before we spawn the trim thread? + if (env.connections.empty()) { + ldout(cct, 4) << "no peers, exiting" << dendl; + return set_cr_done(); + } + + ldout(cct, 10) << "fetching sync status for zone " << env.zone << dendl; + // query mdlog sync status from peers + yield call(new MetaMasterStatusCollectCR(env)); + + // must get a successful reply from all peers to consider trimming + if (ret < 0) { + ldout(cct, 4) << "failed to fetch sync status from all peers" << dendl; + return set_cr_error(ret); + } + + // determine the minimum epoch and markers + ret = take_min_status(env.store->ctx(), env.peer_status.begin(), + env.peer_status.end(), &min_status); + if (ret < 0) { + ldout(cct, 4) << "failed to calculate min sync status from peers" << dendl; + return set_cr_error(ret); + } + yield { + auto store = env.store; + auto epoch = min_status.sync_info.realm_epoch; + ldout(cct, 4) << "realm epoch min=" << epoch + << " current=" << env.current.get_epoch()<< dendl; + if (epoch > env.last_trim_epoch + 1) { + // delete any prior mdlog periods + spawn(new PurgePeriodLogsCR(store, epoch, &env.last_trim_epoch), true); + } else { + ldout(cct, 10) << "mdlogs already purged up to realm_epoch " + << env.last_trim_epoch << dendl; + } + + // if realm_epoch == current, trim mdlog based on markers + if (epoch == env.current.get_epoch()) { + auto mdlog = store->meta_mgr->get_log(env.current.get_period().get_id()); + spawn(new MetaMasterTrimShardCollectCR(env, mdlog, min_status), true); + } + } + // ignore any errors during purge/trim because we want to hold the lock open + return set_cr_done(); + } + return 0; +} + + +/// read the first entry of the master's mdlog shard and trim to that position +class MetaPeerTrimShardCR : public RGWCoroutine { + RGWMetaSyncEnv& env; + RGWMetadataLog *mdlog; + const std::string& period_id; + const int shard_id; + RGWMetadataLogInfo info; + ceph::real_time stable; //< safe timestamp to trim, according to master + ceph::real_time *last_trim; //< last trimmed timestamp, updated on trim + rgw_mdlog_shard_data result; //< result from master's mdlog listing + + public: + MetaPeerTrimShardCR(RGWMetaSyncEnv& env, RGWMetadataLog *mdlog, + const std::string& period_id, int shard_id, + ceph::real_time *last_trim) + : RGWCoroutine(env.store->ctx()), env(env), mdlog(mdlog), + period_id(period_id), shard_id(shard_id), last_trim(last_trim) + {} + + int operate() override; +}; + +int MetaPeerTrimShardCR::operate() +{ + reenter(this) { + // query master's first mdlog entry for this shard + yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id, + "", 1, &result)); + if (retcode < 0) { + ldout(cct, 5) << "failed to read first entry from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (result.entries.empty()) { + // if there are no mdlog entries, we don't have a timestamp to compare. we + // can't just trim everything, because there could be racing updates since + // this empty reply. query the mdlog shard info to read its max timestamp, + // then retry the listing to make sure it's still empty before trimming to + // that + ldout(cct, 10) << "empty master mdlog shard " << shard_id + << ", reading last timestamp from shard info" << dendl; + // read the mdlog shard info for the last timestamp + using ShardInfoCR = RGWReadRemoteMDLogShardInfoCR; + yield call(new ShardInfoCR(&env, period_id, shard_id, &info)); + if (retcode < 0) { + ldout(cct, 5) << "failed to read info from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (ceph::real_clock::is_zero(info.last_update)) { + return set_cr_done(); // nothing to trim + } + ldout(cct, 10) << "got mdlog shard info with last update=" + << info.last_update << dendl; + // re-read the master's first mdlog entry to make sure it hasn't changed + yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id, + "", 1, &result)); + if (retcode < 0) { + ldout(cct, 5) << "failed to read first entry from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + // if the mdlog is still empty, trim to max marker + if (result.entries.empty()) { + stable = info.last_update; + } else { + stable = result.entries.front().timestamp; + + // can only trim -up to- master's first timestamp, so subtract a second. + // (this is why we use timestamps instead of markers for the peers) + stable -= std::chrono::seconds(1); + } + } else { + stable = result.entries.front().timestamp; + stable -= std::chrono::seconds(1); + } + + if (stable <= *last_trim) { + ldout(cct, 10) << "skipping log shard " << shard_id + << " at timestamp=" << stable + << " last_trim=" << *last_trim << dendl; + return set_cr_done(); + } + + ldout(cct, 10) << "trimming log shard " << shard_id + << " at timestamp=" << stable + << " last_trim=" << *last_trim << dendl; + yield { + std::string oid; + mdlog->get_shard_oid(shard_id, oid); + call(new RGWRadosTimelogTrimCR(env.store, oid, real_time{}, stable, "", "")); + } + if (retcode < 0 && retcode != -ENODATA) { + ldout(cct, 1) << "failed to trim mdlog shard " << shard_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + *last_trim = stable; + return set_cr_done(); + } + return 0; +} + +class MetaPeerTrimShardCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + PeerTrimEnv& env; + RGWMetadataLog *mdlog; + const std::string& period_id; + RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR + int shard_id{0}; + + public: + MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), mdlog(mdlog), period_id(env.current.get_period().get_id()) + { + meta_env.init(cct, env.store, env.store->rest_master_conn, + env.store->get_async_rados(), env.http, nullptr); + } + + bool spawn_next() override; +}; + +bool MetaPeerTrimShardCollectCR::spawn_next() +{ + if (shard_id >= env.num_shards) { + return false; + } + auto& last_trim = env.last_trim_timestamps[shard_id]; + spawn(new MetaPeerTrimShardCR(meta_env, mdlog, period_id, shard_id, &last_trim), + false); + shard_id++; + return true; +} + +class MetaPeerTrimCR : public RGWCoroutine { + PeerTrimEnv& env; + rgw_mdlog_info mdlog_info; //< master's mdlog info + + public: + MetaPeerTrimCR(PeerTrimEnv& env) : RGWCoroutine(env.store->ctx()), env(env) {} + + int operate(); +}; + +int MetaPeerTrimCR::operate() +{ + reenter(this) { + ldout(cct, 10) << "fetching master mdlog info" << dendl; + yield { + // query mdlog_info from master for oldest_log_period + rgw_http_param_pair params[] = { + { "type", "metadata" }, + { nullptr, nullptr } + }; + + using LogInfoCR = RGWReadRESTResourceCR<rgw_mdlog_info>; + call(new LogInfoCR(cct, env.store->rest_master_conn, env.http, + "/admin/log/", params, &mdlog_info)); + } + if (retcode < 0) { + ldout(cct, 4) << "failed to read mdlog info from master" << dendl; + return set_cr_error(retcode); + } + // use master's shard count instead + env.set_num_shards(mdlog_info.num_shards); + + if (mdlog_info.realm_epoch > env.last_trim_epoch + 1) { + // delete any prior mdlog periods + yield call(new PurgePeriodLogsCR(env.store, mdlog_info.realm_epoch, + &env.last_trim_epoch)); + } else { + ldout(cct, 10) << "mdlogs already purged through realm_epoch " + << env.last_trim_epoch << dendl; + } + + // if realm_epoch == current, trim mdlog based on master's markers + if (mdlog_info.realm_epoch == env.current.get_epoch()) { + yield { + auto meta_mgr = env.store->meta_mgr; + auto mdlog = meta_mgr->get_log(env.current.get_period().get_id()); + call(new MetaPeerTrimShardCollectCR(env, mdlog)); + // ignore any errors during purge/trim because we want to hold the lock open + } + } + return set_cr_done(); + } + return 0; +} + +class MetaTrimPollCR : public RGWCoroutine { + RGWRados *const store; + const utime_t interval; //< polling interval + const rgw_raw_obj obj; + const std::string name{"meta_trim"}; //< lock name + const std::string cookie; + + protected: + /// allocate the coroutine to run within the lease + virtual RGWCoroutine* alloc_cr() = 0; + + public: + MetaTrimPollCR(RGWRados *store, utime_t interval) + : RGWCoroutine(store->ctx()), store(store), interval(interval), + obj(store->get_zone_params().log_pool, RGWMetadataLogHistory::oid), + cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)) + {} + + int operate(); +}; + +int MetaTrimPollCR::operate() +{ + reenter(this) { + for (;;) { + set_status("sleeping"); + wait(interval); + + // prevent others from trimming for our entire wait interval + set_status("acquiring trim lock"); + yield call(new RGWSimpleRadosLockCR(store->get_async_rados(), store, + obj, name, cookie, interval.sec())); + if (retcode < 0) { + ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl; + continue; + } + + set_status("trimming"); + yield call(alloc_cr()); + + if (retcode < 0) { + // on errors, unlock so other gateways can try + set_status("unlocking"); + yield call(new RGWSimpleRadosUnlockCR(store->get_async_rados(), store, + obj, name, cookie)); + } + } + } + return 0; +} + +class MetaMasterTrimPollCR : public MetaTrimPollCR { + MasterTrimEnv env; //< trim state to share between calls + RGWCoroutine* alloc_cr() override { + return new MetaMasterTrimCR(env); + } + public: + MetaMasterTrimPollCR(RGWRados *store, RGWHTTPManager *http, + int num_shards, utime_t interval) + : MetaTrimPollCR(store, interval), + env(store, http, num_shards) + {} +}; + +class MetaPeerTrimPollCR : public MetaTrimPollCR { + PeerTrimEnv env; //< trim state to share between calls + RGWCoroutine* alloc_cr() override { + return new MetaPeerTrimCR(env); + } + public: + MetaPeerTrimPollCR(RGWRados *store, RGWHTTPManager *http, + int num_shards, utime_t interval) + : MetaTrimPollCR(store, interval), + env(store, http, num_shards) + {} +}; + +RGWCoroutine* create_meta_log_trim_cr(RGWRados *store, RGWHTTPManager *http, + int num_shards, utime_t interval) +{ + if (store->is_meta_master()) { + return new MetaMasterTrimPollCR(store, http, num_shards, interval); + } + return new MetaPeerTrimPollCR(store, http, num_shards, interval); +} + + +struct MetaMasterAdminTrimCR : private MasterTrimEnv, public MetaMasterTrimCR { + MetaMasterAdminTrimCR(RGWRados *store, RGWHTTPManager *http, int num_shards) + : MasterTrimEnv(store, http, num_shards), + MetaMasterTrimCR(*static_cast<MasterTrimEnv*>(this)) + {} +}; + +struct MetaPeerAdminTrimCR : private PeerTrimEnv, public MetaPeerTrimCR { + MetaPeerAdminTrimCR(RGWRados *store, RGWHTTPManager *http, int num_shards) + : PeerTrimEnv(store, http, num_shards), + MetaPeerTrimCR(*static_cast<PeerTrimEnv*>(this)) + {} +}; + +RGWCoroutine* create_admin_meta_log_trim_cr(RGWRados *store, + RGWHTTPManager *http, + int num_shards) +{ + if (store->is_meta_master()) { + return new MetaMasterAdminTrimCR(store, http, num_shards); + } + return new MetaPeerAdminTrimCR(store, http, num_shards); +} diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h index 878c5d8e18b..c651f7a9ad1 100644 --- a/src/rgw/rgw_sync.h +++ b/src/rgw/rgw_sync.h @@ -8,6 +8,8 @@ #include "include/stringify.h" #include "common/RWLock.h" +#include <atomic> + #define ERROR_LOGGER_SHARDS 32 #define RGW_SYNC_ERROR_LOG_SHARD_PREFIX "sync.error-log" @@ -65,7 +67,7 @@ class RGWSyncErrorLogger { vector<string> oids; int num_shards; - atomic_t counter; + std::atomic<int64_t> counter = { 0 }; public: RGWSyncErrorLogger(RGWRados *_store, const string &oid_prefix, int _num_shards); RGWCoroutine *log_error_cr(const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message); @@ -191,12 +193,11 @@ class RGWRemoteMetaLog : public RGWCoroutinesManager { RGWSyncBackoff backoff; RGWMetaSyncEnv sync_env; - rgw_meta_sync_status sync_status; void init_sync_env(RGWMetaSyncEnv *env); - int store_sync_info(); + int store_sync_info(const rgw_meta_sync_info& sync_info); - atomic_t going_down; + std::atomic<bool> going_down = { false }; public: RGWRemoteMetaLog(RGWRados *_store, RGWAsyncRadosProcessor *async_rados, @@ -214,7 +215,7 @@ public: int read_log_info(rgw_mdlog_info *log_info); int read_master_log_shards_info(const string& master_period, map<int, RGWMetadataLogInfo> *shards_info); int read_master_log_shards_next(const string& period, map<int, string> shard_markers, map<int, rgw_mdlog_shard_data> *result); - int read_sync_status(); + int read_sync_status(rgw_meta_sync_status *sync_status); int init_sync_status(); int run_sync(); @@ -223,7 +224,6 @@ public: RGWMetaSyncEnv& get_sync_env() { return sync_env; } - const rgw_meta_sync_status& get_sync_status() const { return sync_status; } }; class RGWMetaSyncStatusManager { @@ -257,13 +257,10 @@ public: : store(_store), master_log(store, async_rados, this), ts_to_shard_lock("ts_to_shard_lock") {} int init(); - void finish(); - const rgw_meta_sync_status& get_sync_status() const { - return master_log.get_sync_status(); + int read_sync_status(rgw_meta_sync_status *sync_status) { + return master_log.read_sync_status(sync_status); } - - int read_sync_status() { return master_log.read_sync_status(); } int init_sync_status() { return master_log.init_sync_status(); } int read_log_info(rgw_mdlog_info *log_info) { return master_log.read_log_info(log_info); @@ -455,5 +452,13 @@ public: int operate() override; }; +// MetaLogTrimCR factory function +RGWCoroutine* create_meta_log_trim_cr(RGWRados *store, RGWHTTPManager *http, + int num_shards, utime_t interval); + +// factory function for mdlog trim via radosgw-admin +RGWCoroutine* create_admin_meta_log_trim_cr(RGWRados *store, + RGWHTTPManager *http, + int num_shards); #endif diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc index 79bffb2018f..b79fecb6a85 100644 --- a/src/rgw/rgw_tools.cc +++ b/src/rgw/rgw_tools.cc @@ -42,7 +42,6 @@ int rgw_get_system_obj(RGWRados *rgwstore, RGWObjectCtx& obj_ctx, const rgw_pool RGWObjVersionTracker *objv_tracker, real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info) { - struct rgw_err err; bufferlist::iterator iter; int request_len = READ_CHUNK_LEN; rgw_raw_obj obj(pool, key); @@ -58,7 +57,6 @@ int rgw_get_system_obj(RGWRados *rgwstore, RGWObjectCtx& obj_ctx, const rgw_pool rop.stat_params.attrs = pattrs; rop.stat_params.lastmod = pmtime; - rop.stat_params.perr = &err; int ret = rop.stat(objv_tracker); if (ret < 0) diff --git a/src/rgw/rgw_torrent.h b/src/rgw/rgw_torrent.h index 6d2b28cd91a..ab8e89bbd79 100644 --- a/src/rgw/rgw_torrent.h +++ b/src/rgw/rgw_torrent.h @@ -11,7 +11,6 @@ #include "rgw_rados.h" #include "rgw_common.h" -using namespace std; using ceph::crypto::SHA1; struct req_state; diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h index 50cc1b62909..7a27d0e7007 100644 --- a/src/rgw/rgw_user.h +++ b/src/rgw/rgw_user.h @@ -19,8 +19,6 @@ #include "common/Formatter.h" #include "rgw_formats.h" -using namespace std; - #define RGW_USER_ANON_ID "anonymous" #define SECRET_KEY_LEN 40 diff --git a/src/rgw/rgw_xml.h b/src/rgw/rgw_xml.h index 2517c7b78e8..1f37a6a49e2 100644 --- a/src/rgw/rgw_xml.h +++ b/src/rgw/rgw_xml.h @@ -10,9 +10,6 @@ #include <include/types.h> #include <common/Formatter.h> -using namespace std; - - class XMLObj; class XMLObjIter { |