summaryrefslogtreecommitdiffstats
path: root/src/rgw
diff options
context:
space:
mode:
Diffstat (limited to 'src/rgw')
-rw-r--r--src/rgw/CMakeLists.txt38
-rw-r--r--src/rgw/rgw_acl.h2
-rw-r--r--src/rgw/rgw_acl_s3.h3
-rw-r--r--src/rgw/rgw_acl_swift.cc40
-rw-r--r--src/rgw/rgw_acl_swift.h4
-rw-r--r--src/rgw/rgw_admin.cc316
-rw-r--r--src/rgw/rgw_asio_client.cc78
-rw-r--r--src/rgw/rgw_asio_client.h113
-rw-r--r--src/rgw/rgw_asio_frontend.cc115
-rw-r--r--src/rgw/rgw_auth.cc57
-rw-r--r--src/rgw/rgw_auth.h9
-rw-r--r--src/rgw/rgw_auth_filters.h5
-rw-r--r--src/rgw/rgw_basic_types.cc28
-rw-r--r--src/rgw/rgw_basic_types.h82
-rw-r--r--src/rgw/rgw_bucket.cc21
-rw-r--r--src/rgw/rgw_bucket.h18
-rw-r--r--src/rgw/rgw_cache.h12
-rw-r--r--src/rgw/rgw_civetweb_frontend.cc18
-rw-r--r--src/rgw/rgw_common.cc325
-rw-r--r--src/rgw/rgw_common.h83
-rw-r--r--src/rgw/rgw_coroutine.cc17
-rw-r--r--src/rgw/rgw_coroutine.h11
-rw-r--r--src/rgw/rgw_cors_s3.h3
-rw-r--r--src/rgw/rgw_cors_swift.h2
-rw-r--r--src/rgw/rgw_cr_rados.cc72
-rw-r--r--src/rgw/rgw_cr_rados.h61
-rw-r--r--src/rgw/rgw_crypt.cc14
-rw-r--r--src/rgw/rgw_crypt.h19
-rw-r--r--src/rgw/rgw_data_sync.cc175
-rw-r--r--src/rgw/rgw_file.cc7
-rw-r--r--src/rgw/rgw_file.h20
-rw-r--r--src/rgw/rgw_gc.cc4
-rw-r--r--src/rgw/rgw_gc.h5
-rw-r--r--src/rgw/rgw_http_client.cc16
-rw-r--r--src/rgw/rgw_http_client.h9
-rw-r--r--src/rgw/rgw_http_errors.h139
-rw-r--r--src/rgw/rgw_iam_policy.cc1514
-rw-r--r--src/rgw/rgw_iam_policy.h466
-rw-r--r--src/rgw/rgw_iam_policy_keywords.gperf127
-rw-r--r--src/rgw/rgw_iam_policy_keywords.h139
-rw-r--r--src/rgw/rgw_json_enc.cc2
-rw-r--r--src/rgw/rgw_keystone.cc2
-rw-r--r--src/rgw/rgw_keystone.h6
-rw-r--r--src/rgw/rgw_lc.cc30
-rw-r--r--src/rgw/rgw_lc.h9
-rw-r--r--src/rgw/rgw_lc_s3.h6
-rw-r--r--src/rgw/rgw_ldap.cc1
-rw-r--r--src/rgw/rgw_loadgen_process.cc12
-rw-r--r--src/rgw/rgw_log.cc2
-rw-r--r--src/rgw/rgw_log.h1
-rw-r--r--src/rgw/rgw_main.cc19
-rw-r--r--src/rgw/rgw_meta_sync_status.h9
-rw-r--r--src/rgw/rgw_metadata.cc178
-rw-r--r--src/rgw/rgw_metadata.h32
-rw-r--r--src/rgw/rgw_object_expirer_core.cc4
-rw-r--r--src/rgw/rgw_object_expirer_core.h5
-rw-r--r--src/rgw/rgw_op.cc909
-rw-r--r--src/rgw/rgw_op.h109
-rw-r--r--src/rgw/rgw_period_history.cc9
-rw-r--r--src/rgw/rgw_period_history.h3
-rw-r--r--src/rgw/rgw_period_pusher.cc5
-rw-r--r--src/rgw/rgw_process.h4
-rw-r--r--src/rgw/rgw_quota.cc8
-rw-r--r--src/rgw/rgw_quota.h4
-rw-r--r--src/rgw/rgw_rados.cc185
-rw-r--r--src/rgw/rgw_rados.h47
-rw-r--r--src/rgw/rgw_realm_reloader.cc8
-rw-r--r--src/rgw/rgw_realm_reloader.h13
-rw-r--r--src/rgw/rgw_replica_log.h2
-rw-r--r--src/rgw/rgw_request.h7
-rw-r--r--src/rgw/rgw_rest.cc469
-rw-r--r--src/rgw/rgw_rest.h64
-rw-r--r--src/rgw/rgw_rest_conn.cc33
-rw-r--r--src/rgw/rgw_rest_conn.h7
-rw-r--r--src/rgw/rgw_rest_log.cc4
-rw-r--r--src/rgw/rgw_rest_realm.cc4
-rw-r--r--src/rgw/rgw_rest_role.cc24
-rw-r--r--src/rgw/rgw_rest_role.h2
-rw-r--r--src/rgw/rgw_rest_s3.cc423
-rw-r--r--src/rgw/rgw_rest_s3.h50
-rw-r--r--src/rgw/rgw_rest_swift.cc321
-rw-r--r--src/rgw/rgw_rest_swift.h91
-rw-r--r--src/rgw/rgw_role.cc81
-rw-r--r--src/rgw/rgw_role.h44
-rw-r--r--src/rgw/rgw_sync.cc924
-rw-r--r--src/rgw/rgw_sync.h27
-rw-r--r--src/rgw/rgw_tools.cc2
-rw-r--r--src/rgw/rgw_torrent.h1
-rw-r--r--src/rgw/rgw_user.h2
-rw-r--r--src/rgw/rgw_xml.h3
90 files changed, 6648 insertions, 1746 deletions
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
index 583b3531cdb..591d136ccae 100644
--- a/src/rgw/CMakeLists.txt
+++ b/src/rgw/CMakeLists.txt
@@ -25,6 +25,19 @@ add_custom_target(civetweb_h
"${CMAKE_BINARY_DIR}/src/include/civetweb"
COMMENT "keep civetweb.h up-to-date")
+find_program(GPERF gperf)
+if(NOT GPERF)
+ message(FATAL_ERROR "Can't find gperf")
+endif()
+function(gperf_generate input output)
+ add_custom_command(
+ OUTPUT ${output}
+ COMMAND ${GPERF} ${input} > ${output}
+ DEPENDS ${input}
+ COMMENT "Generate ${output}"
+ )
+endfunction()
+
set(rgw_a_srcs
rgw_acl.cc
rgw_acl_s3.cc
@@ -103,7 +116,15 @@ set(rgw_a_srcs
rgw_xml_enc.cc
rgw_torrent.cc
rgw_crypt.cc
- rgw_crypt_sanitize.cc)
+ rgw_crypt_sanitize.cc
+ rgw_iam_policy.cc)
+
+gperf_generate(${CMAKE_SOURCE_DIR}/src/rgw/rgw_iam_policy_keywords.gperf
+ rgw_iam_policy_keywords.frag.cc)
+set_source_files_properties(rgw_iam_policy.cc PROPERTIES
+ OBJECT_DEPENDS ${CMAKE_BINARY_DIR}/src/rgw/rgw_iam_policy_keywords.frag.cc
+ COMPILE_FLAGS -I${CMAKE_BINARY_DIR}/src/rgw)
+
if (WITH_RADOSGW_FCGI_FRONTEND)
list(APPEND rgw_a_srcs rgw_fcgi.cc)
@@ -115,12 +136,15 @@ add_dependencies(rgw_a civetweb_h)
target_include_directories(rgw_a PUBLIC
"../Beast/include"
- ${FCGI_INCLUDE_DIR})
+ ${FCGI_INCLUDE_DIR}
+ "../rapidjson/include"
+ )
+target_compile_definitions(rgw_a PUBLIC BOOST_COROUTINES_NO_DEPRECATION_WARNING)
target_link_libraries(rgw_a librados cls_lock_client cls_rgw_client cls_refcount_client
cls_log_client cls_statelog_client cls_timeindex_client cls_version_client
cls_replica_log_client cls_user_client ceph-common common_utf8 global
- ${CURL_LIBRARIES}
+ ${CURL_LIBRARIES} ${Boost_LIBRARIES}
${EXPAT_LIBRARIES}
${OPENLDAP_LIBRARIES} ${CRYPTO_LIBS})
@@ -134,15 +158,15 @@ if (WITH_RADOSGW_FCGI_FRONTEND)
list(APPEND radosgw_srcs rgw_fcgi_process.cc)
endif()
-if (WITH_RADOSGW_ASIO_FRONTEND)
+if (WITH_RADOSGW_BEAST_FRONTEND)
list(APPEND radosgw_srcs
rgw_asio_client.cc
rgw_asio_frontend.cc)
-endif (WITH_RADOSGW_ASIO_FRONTEND)
+endif (WITH_RADOSGW_BEAST_FRONTEND)
add_library(radosgw_a STATIC ${radosgw_srcs}
$<TARGET_OBJECTS:civetweb_common_objs>)
-target_link_libraries(radosgw_a rgw_a)
+target_link_libraries(radosgw_a rgw_a ${SSL_LIBRARIES})
add_executable(radosgw rgw_main.cc)
target_link_libraries(radosgw radosgw_a librados
@@ -150,7 +174,7 @@ target_link_libraries(radosgw radosgw_a librados
cls_log_client cls_statelog_client cls_timeindex_client
cls_version_client cls_replica_log_client cls_user_client
global ${FCGI_LIBRARY} ${LIB_RESOLV}
- ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${SSL_LIBRARIES} ${BLKID_LIBRARIES}
+ ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${BLKID_LIBRARIES}
${ALLOC_LIBS})
# radosgw depends on cls libraries at runtime, but not as link dependencies
add_dependencies(radosgw cls_rgw cls_lock cls_refcount
diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h
index 24567677e8e..26c84d121a2 100644
--- a/src/rgw/rgw_acl.h
+++ b/src/rgw/rgw_acl.h
@@ -15,8 +15,6 @@
#include "rgw_basic_types.h"
-using namespace std;
-
#define RGW_PERM_NONE 0x00
#define RGW_PERM_READ 0x01
#define RGW_PERM_WRITE 0x02
diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h
index 3a52735e091..6991a0b8363 100644
--- a/src/rgw/rgw_acl_s3.h
+++ b/src/rgw/rgw_acl_s3.h
@@ -13,9 +13,6 @@
#include "rgw_xml.h"
#include "rgw_acl.h"
-
-using namespace std;
-
class RGWRados;
class ACLPermission_S3 : public ACLPermission, public XMLObj
diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc
index 78b60124ba6..339b93bbe06 100644
--- a/src/rgw/rgw_acl_swift.cc
+++ b/src/rgw/rgw_acl_swift.cc
@@ -179,11 +179,13 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store,
const rgw_user& id,
const std::string& name,
const std::string& read_list,
- const std::string& write_list)
+ const std::string& write_list,
+ uint32_t& rw_mask)
{
acl.create_default(id, name);
owner.set_id(id);
owner.set_name(name);
+ rw_mask = 0;
if (read_list.size()) {
std::vector<std::string> uids;
@@ -200,6 +202,7 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store,
<< r << dendl;
return r;
}
+ rw_mask |= SWIFT_PERM_READ;
}
if (write_list.size()) {
std::vector<std::string> uids;
@@ -216,10 +219,45 @@ int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store,
<< r << dendl;
return r;
}
+ rw_mask |= SWIFT_PERM_WRITE;
}
return 0;
}
+void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask,
+ RGWAccessControlPolicy_SWIFT *old)
+{
+ /* rw_mask&SWIFT_PERM_READ => setting read acl,
+ * rw_mask&SWIFT_PERM_WRITE => setting write acl
+ * when bit is cleared, copy matching elements from old.
+ */
+ if (rw_mask == (SWIFT_PERM_READ|SWIFT_PERM_WRITE)) {
+ return;
+ }
+ rw_mask ^= (SWIFT_PERM_READ|SWIFT_PERM_WRITE);
+ for (auto &iter: old->acl.get_grant_map()) {
+ ACLGrant& grant = iter.second;
+ uint32_t perm = grant.get_permission().get_permissions();
+ rgw_user id;
+ string url_spec;
+ if (!grant.get_id(id)) {
+ if (grant.get_group() != ACL_GROUP_ALL_USERS) {
+ url_spec = grant.get_referer();
+ if (url_spec.empty()) {
+ continue;
+ }
+ if (perm == 0) {
+ /* We need to carry also negative, HTTP referrer-based ACLs. */
+ perm = SWIFT_PERM_READ;
+ }
+ }
+ }
+ if (perm & rw_mask) {
+ acl.add_grant(&grant);
+ }
+ }
+}
+
void RGWAccessControlPolicy_SWIFT::to_str(string& read, string& write)
{
multimap<string, ACLGrant>& m = acl.get_grant_map();
diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h
index b74a85ecfe1..883b623af18 100644
--- a/src/rgw/rgw_acl_swift.h
+++ b/src/rgw/rgw_acl_swift.h
@@ -27,7 +27,9 @@ public:
const rgw_user& id,
const std::string& name,
const std::string& read_list,
- const std::string& write_list);
+ const std::string& write_list,
+ uint32_t& rw_mask);
+ void filter_merge(uint32_t mask, RGWAccessControlPolicy_SWIFT *policy);
void to_str(std::string& read, std::string& write);
};
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 46a9d3f933c..69ff573fcde 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -421,6 +421,7 @@ enum {
OPT_METADATA_SYNC_INIT,
OPT_METADATA_SYNC_RUN,
OPT_MDLOG_LIST,
+ OPT_MDLOG_AUTOTRIM,
OPT_MDLOG_TRIM,
OPT_MDLOG_FETCH,
OPT_MDLOG_STATUS,
@@ -819,6 +820,8 @@ static int get_cmd(const char *cmd, const char *prev_cmd, const char *prev_prev_
} else if (strcmp(prev_cmd, "mdlog") == 0) {
if (strcmp(cmd, "list") == 0)
return OPT_MDLOG_LIST;
+ if (strcmp(cmd, "autotrim") == 0)
+ return OPT_MDLOG_AUTOTRIM;
if (strcmp(cmd, "trim") == 0)
return OPT_MDLOG_TRIM;
if (strcmp(cmd, "fetch") == 0)
@@ -1511,32 +1514,46 @@ int do_check_object_locator(const string& tenant_name, const string& bucket_name
return 0;
}
-#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response
-static int send_to_remote_gateway(const string& remote, req_info& info,
- bufferlist& in_data, JSONParser& parser)
+/// search for a matching zone/zonegroup id and return a connection if found
+static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store,
+ const RGWZoneGroup& zonegroup,
+ const std::string& remote)
{
- bufferlist response;
- RGWRESTConn *conn;
- if (remote.empty()) {
- if (!store->rest_master_conn) {
- cerr << "Invalid rest master connection" << std::endl;
- return -EINVAL;
- }
- conn = store->rest_master_conn;
+ boost::optional<RGWRESTConn> conn;
+ if (remote == zonegroup.get_id()) {
+ conn.emplace(store->ctx(), store, remote, zonegroup.endpoints);
} else {
- // check zonegroups
- auto iter = store->zonegroup_conn_map.find(remote);
- if (iter == store->zonegroup_conn_map.end()) {
- // check zones
- iter = store->zone_conn_map.find(remote);
- if (iter == store->zone_conn_map.end()) {
- cerr << "could not find connection for zone or zonegroup id: "
- << remote << std::endl;
- return -ENOENT;
+ for (const auto& z : zonegroup.zones) {
+ const auto& zone = z.second;
+ if (remote == zone.id) {
+ conn.emplace(store->ctx(), store, remote, zone.endpoints);
+ break;
}
}
- conn = iter->second;
}
+ return conn;
+}
+
+/// search each zonegroup for a connection
+static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store,
+ const RGWPeriodMap& period_map,
+ const std::string& remote)
+{
+ boost::optional<RGWRESTConn> conn;
+ for (const auto& zg : period_map.zonegroups) {
+ conn = get_remote_conn(store, zg.second, remote);
+ if (conn) {
+ break;
+ }
+ }
+ return conn;
+}
+
+#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response
+static int send_to_remote_gateway(RGWRESTConn* conn, req_info& info,
+ bufferlist& in_data, JSONParser& parser)
+{
+ bufferlist response;
rgw_user user;
int ret = conn->forward(user, info, NULL, MAX_REST_RESPONSE, &in_data, &response);
@@ -1574,20 +1591,21 @@ static int send_to_url(const string& url, const string& access,
return ret;
}
-static int send_to_remote_or_url(const string& remote, const string& url,
+static int send_to_remote_or_url(RGWRESTConn *conn, const string& url,
const string& access, const string& secret,
req_info& info, bufferlist& in_data,
JSONParser& parser)
{
if (url.empty()) {
- return send_to_remote_gateway(remote, info, in_data, parser);
+ return send_to_remote_gateway(conn, info, in_data, parser);
}
return send_to_url(url, access, secret, info, in_data, parser);
}
static int commit_period(RGWRealm& realm, RGWPeriod& period,
string remote, const string& url,
- const string& access, const string& secret)
+ const string& access, const string& secret,
+ bool force)
{
const string& master_zone = period.get_master_zone();
if (master_zone.empty()) {
@@ -1605,7 +1623,7 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period,
return ret;
}
// the master zone can commit locally
- ret = period.commit(realm, current_period, cerr);
+ ret = period.commit(realm, current_period, cerr, force);
if (ret < 0) {
cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
}
@@ -1617,6 +1635,17 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period,
remote = master_zone;
cout << "Sending period to new master zone " << remote << std::endl;
}
+ boost::optional<RGWRESTConn> conn;
+ RGWRESTConn *remote_conn = nullptr;
+ if (!remote.empty()) {
+ conn = get_remote_conn(store, period.get_map(), remote);
+ if (!conn) {
+ cerr << "failed to find a zone or zonegroup for remote "
+ << remote << std::endl;
+ return -ENOENT;
+ }
+ remote_conn = &*conn;
+ }
// push period to the master with an empty period id
period.set_id("");
@@ -1633,7 +1662,7 @@ static int commit_period(RGWRealm& realm, RGWPeriod& period,
jf.flush(bl);
JSONParser p;
- int ret = send_to_remote_or_url(remote, url, access, secret, info, bl, p);
+ int ret = send_to_remote_or_url(remote_conn, url, access, secret, info, bl, p);
if (ret < 0) {
cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
@@ -1682,7 +1711,7 @@ static int update_period(const string& realm_id, const string& realm_name,
const string& period_id, const string& period_epoch,
bool commit, const string& remote, const string& url,
const string& access, const string& secret,
- Formatter *formatter)
+ Formatter *formatter, bool force)
{
RGWRealm realm(realm_id, realm_name);
int ret = realm.init(g_ceph_context, store);
@@ -1713,7 +1742,7 @@ static int update_period(const string& realm_id, const string& realm_name,
return ret;
}
if (commit) {
- ret = commit_period(realm, period, remote, url, access, secret);
+ ret = commit_period(realm, period, remote, url, access, secret, force);
if (ret < 0) {
cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
return ret;
@@ -1739,8 +1768,10 @@ static int init_bucket_for_sync(const string& tenant, const string& bucket_name,
return 0;
}
-static int do_period_pull(const string& remote, const string& url, const string& access_key, const string& secret_key,
- const string& realm_id, const string& realm_name, const string& period_id, const string& period_epoch,
+static int do_period_pull(RGWRESTConn *remote_conn, const string& url,
+ const string& access_key, const string& secret_key,
+ const string& realm_id, const string& realm_name,
+ const string& period_id, const string& period_epoch,
RGWPeriod *period)
{
RGWEnv env;
@@ -1760,7 +1791,7 @@ static int do_period_pull(const string& remote, const string& url, const string&
bufferlist bl;
JSONParser p;
- int ret = send_to_remote_or_url(remote, url, access_key, secret_key,
+ int ret = send_to_remote_or_url(remote_conn, url, access_key, secret_key,
info, bl, p);
if (ret < 0) {
cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
@@ -1826,14 +1857,13 @@ static void get_md_sync_status(list<string>& status)
return;
}
- ret = sync.read_sync_status();
+ rgw_meta_sync_status sync_status;
+ ret = sync.read_sync_status(&sync_status);
if (ret < 0) {
status.push_back(string("failed to read sync status: ") + cpp_strerror(-ret));
return;
}
- const rgw_meta_sync_status& sync_status = sync.get_sync_status();
-
string status_str;
switch (sync_status.sync_info.state) {
case rgw_meta_sync_info::StateInit:
@@ -1912,7 +1942,8 @@ static void get_md_sync_status(list<string>& status)
continue;
}
auto master_marker = iter->second.marker;
- if (master_marker > local_iter.second.marker) {
+ if (local_iter.second.state == rgw_meta_sync_marker::SyncState::IncrementalSync &&
+ master_marker > local_iter.second.marker) {
shards_behind[shard_id] = local_iter.second.marker;
}
}
@@ -2059,7 +2090,8 @@ static void get_data_sync_status(const string& source_zone, list<string>& status
continue;
}
auto master_marker = iter->second.marker;
- if (master_marker > local_iter.second.marker) {
+ if (local_iter.second.state == rgw_data_sync_marker::SyncState::IncrementalSync &&
+ master_marker > local_iter.second.marker) {
shards_behind[shard_id] = local_iter.second.marker;
}
}
@@ -2823,15 +2855,6 @@ int main(int argc, const char **argv)
++i;
}
}
- if (tenant.empty()) {
- tenant = user_id.tenant;
- } else {
- if (user_id.empty()) {
- cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl;
- return EINVAL;
- }
- user_id.tenant = tenant;
- }
if (args.empty()) {
return usage();
@@ -2871,6 +2894,23 @@ int main(int argc, const char **argv)
}
}
+ if (tenant.empty()) {
+ tenant = user_id.tenant;
+ } else {
+ if (user_id.empty() && opt_cmd != OPT_ROLE_CREATE
+ && opt_cmd != OPT_ROLE_DELETE
+ && opt_cmd != OPT_ROLE_GET
+ && opt_cmd != OPT_ROLE_MODIFY
+ && opt_cmd != OPT_ROLE_LIST
+ && opt_cmd != OPT_ROLE_POLICY_PUT
+ && opt_cmd != OPT_ROLE_POLICY_LIST
+ && opt_cmd != OPT_ROLE_POLICY_GET
+ && opt_cmd != OPT_ROLE_POLICY_DELETE) {
+ cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl;
+ return EINVAL;
+ }
+ user_id.tenant = tenant;
+ }
/* check key parameter conflict */
if ((!access_key.empty()) && gen_access_key) {
cerr << "ERROR: key parameter conflict, --access-key & --gen-access-key" << std::endl;
@@ -2905,9 +2945,6 @@ int main(int argc, const char **argv)
// not a raw op if 'period update' needs to commit to master
bool raw_period_update = opt_cmd == OPT_PERIOD_UPDATE && !commit;
- // not a raw op if 'period pull' needs to look up remotes
- bool raw_period_pull = opt_cmd == OPT_PERIOD_PULL && remote.empty() && !url.empty();
-
std::set<int> raw_storage_ops_list = {OPT_ZONEGROUP_ADD, OPT_ZONEGROUP_CREATE, OPT_ZONEGROUP_DELETE,
OPT_ZONEGROUP_GET, OPT_ZONEGROUP_LIST,
OPT_ZONEGROUP_SET, OPT_ZONEGROUP_DEFAULT,
@@ -2923,6 +2960,7 @@ int main(int argc, const char **argv)
OPT_ZONE_PLACEMENT_MODIFY, OPT_ZONE_PLACEMENT_LIST,
OPT_REALM_CREATE,
OPT_PERIOD_DELETE, OPT_PERIOD_GET,
+ OPT_PERIOD_PULL,
OPT_PERIOD_GET_CURRENT, OPT_PERIOD_LIST,
OPT_GLOBAL_QUOTA_GET, OPT_GLOBAL_QUOTA_SET,
OPT_GLOBAL_QUOTA_ENABLE, OPT_GLOBAL_QUOTA_DISABLE,
@@ -2934,7 +2972,7 @@ int main(int argc, const char **argv)
bool raw_storage_op = (raw_storage_ops_list.find(opt_cmd) != raw_storage_ops_list.end() ||
- raw_period_update || raw_period_pull);
+ raw_period_update);
if (raw_storage_op) {
store = RGWStoreManager::get_raw_storage(g_ceph_context);
@@ -3040,20 +3078,45 @@ int main(int argc, const char **argv)
{
int ret = update_period(realm_id, realm_name, period_id, period_epoch,
commit, remote, url, access_key, secret_key,
- formatter);
+ formatter, yes_i_really_mean_it);
if (ret < 0) {
return -ret;
}
}
break;
- case OPT_PERIOD_PULL: // period pull --url
+ case OPT_PERIOD_PULL:
{
+ boost::optional<RGWRESTConn> conn;
+ RGWRESTConn *remote_conn = nullptr;
if (url.empty()) {
- cerr << "A --url or --remote must be provided." << std::endl;
- return EINVAL;
+ // load current period for endpoints
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store);
+ if (ret < 0) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ RGWPeriod current_period(realm.get_current_period());
+ ret = current_period.init(g_ceph_context, store);
+ if (ret < 0) {
+ cerr << "failed to init current period: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (remote.empty()) {
+ // use realm master zone as remote
+ remote = current_period.get_master_zone();
+ }
+ conn = get_remote_conn(store, current_period.get_map(), remote);
+ if (!conn) {
+ cerr << "failed to find a zone or zonegroup for remote "
+ << remote << std::endl;
+ return -ENOENT;
+ }
+ remote_conn = &*conn;
}
+
RGWPeriod period;
- int ret = do_period_pull(remote, url, access_key, secret_key,
+ int ret = do_period_pull(remote_conn, url, access_key, secret_key,
realm_id, realm_name, period_id, period_epoch,
&period);
if (ret < 0) {
@@ -3399,7 +3462,7 @@ int main(int argc, const char **argv)
auto& current_period = realm.get_current_period();
if (!current_period.empty()) {
// pull the latest epoch of the realm's current period
- ret = do_period_pull(remote, url, access_key, secret_key,
+ ret = do_period_pull(nullptr, url, access_key, secret_key,
realm_id, realm_name, current_period, "",
&period);
if (ret < 0) {
@@ -3642,7 +3705,6 @@ int main(int argc, const char **argv)
}
if (need_update) {
- zonegroup.post_process_params();
ret = zonegroup.update();
if (ret < 0) {
cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl;
@@ -4533,7 +4595,7 @@ int main(int argc, const char **argv)
jf.flush(bl);
JSONParser p;
- ret = send_to_remote_or_url(remote, url, access_key, secret_key,
+ ret = send_to_remote_or_url(nullptr, url, access_key, secret_key,
info, bl, p);
if (ret < 0) {
cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
@@ -4541,43 +4603,11 @@ int main(int argc, const char **argv)
}
}
return 0;
- case OPT_PERIOD_PULL: // period pull --remote
- {
- if (remote.empty()) {
- /* use realm master zonegroup as remote */
- RGWRealm realm(realm_id, realm_name);
- int ret = realm.init(g_ceph_context, store);
- if (ret < 0) {
- cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
- return -ret;
- }
- RGWPeriod current_period(realm.get_current_period());
- ret = current_period.init(g_ceph_context, store);
- if (ret < 0) {
- cerr << "failed to init current period: " << cpp_strerror(-ret) << std::endl;
- return -ret;
- }
- remote = current_period.get_master_zonegroup();
- }
- RGWPeriod period;
- int ret = do_period_pull(remote, url, access_key, secret_key,
- realm_id, realm_name, period_id, period_epoch,
- &period);
- if (ret < 0) {
- cerr << "period pull failed: " << cpp_strerror(-ret) << std::endl;
- return -ret;
- }
-
- encode_json("period", period, formatter);
- formatter->flush(cout);
- cout << std::endl;
- }
- return 0;
case OPT_PERIOD_UPDATE:
{
int ret = update_period(realm_id, realm_name, period_id, period_epoch,
commit, remote, url, access_key, secret_key,
- formatter);
+ formatter, yes_i_really_mean_it);
if (ret < 0) {
return -ret;
}
@@ -4598,7 +4628,8 @@ int main(int argc, const char **argv)
cerr << "period init failed: " << cpp_strerror(-ret) << std::endl;
return -ret;
}
- ret = commit_period(realm, period, remote, url, access_key, secret_key);
+ ret = commit_period(realm, period, remote, url, access_key, secret_key,
+ yes_i_really_mean_it);
if (ret < 0) {
cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
return -ret;
@@ -4611,10 +4642,13 @@ int main(int argc, const char **argv)
return 0;
case OPT_ROLE_CREATE:
{
- string uid;
- user_id.to_str(uid);
- if (role_name.empty() || assume_role_doc.empty() || uid.empty()) {
- cerr << "ERROR: one of role name or assume role policy document or uid is empty" << std::endl;
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (assume_role_doc.empty()) {
+ cerr << "ERROR: assume role policy document is empty" << std::endl;
return -EINVAL;
}
/* The following two calls will be replaced by read_decode_json or something
@@ -4631,7 +4665,7 @@ int main(int argc, const char **argv)
return -EINVAL;
}
string trust_policy = bl.to_str();
- RGWRole role(g_ceph_context, store, role_name, path, trust_policy, uid);
+ RGWRole role(g_ceph_context, store, role_name, path, trust_policy, tenant);
ret = role.create(true);
if (ret < 0) {
return -ret;
@@ -4645,7 +4679,7 @@ int main(int argc, const char **argv)
cerr << "ERROR: empty role name" << std::endl;
return -EINVAL;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.delete_obj();
if (ret < 0) {
return -ret;
@@ -4659,7 +4693,7 @@ int main(int argc, const char **argv)
cerr << "ERROR: empty role name" << std::endl;
return -EINVAL;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.get();
if (ret < 0) {
return -ret;
@@ -4669,10 +4703,16 @@ int main(int argc, const char **argv)
}
case OPT_ROLE_MODIFY:
{
- if (role_name.empty() || assume_role_doc.empty()) {
- cerr << "ERROR: one of role name or assume role policy document is empty" << std::endl;
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
return -EINVAL;
}
+
+ if (assume_role_doc.empty()) {
+ cerr << "ERROR: assume role policy document is empty" << std::endl;
+ return -EINVAL;
+ }
+
/* The following two calls will be replaced by read_decode_json or something
similar when the code for AWS Policies is in place */
bufferlist bl;
@@ -4687,7 +4727,7 @@ int main(int argc, const char **argv)
return -EINVAL;
}
string trust_policy = bl.to_str();
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.get();
if (ret < 0) {
return -ret;
@@ -4703,7 +4743,7 @@ int main(int argc, const char **argv)
case OPT_ROLE_LIST:
{
vector<RGWRole> result;
- ret = RGWRole::get_roles_by_path_prefix(store, g_ceph_context, path_prefix, result);
+ ret = RGWRole::get_roles_by_path_prefix(store, g_ceph_context, path_prefix, tenant, result);
if (ret < 0) {
return -ret;
}
@@ -4712,10 +4752,21 @@ int main(int argc, const char **argv)
}
case OPT_ROLE_POLICY_PUT:
{
- if (role_name.empty() || policy_name.empty() || perm_policy_doc.empty()) {
- cerr << "One of role name, policy name or permission policy document is empty" << std::endl;
+ if (role_name.empty()) {
+ cerr << "role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (policy_name.empty()) {
+ cerr << "policy name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (perm_policy_doc.empty()) {
+ cerr << "permission policy document is empty" << std::endl;
return -EINVAL;
}
+
/* The following two calls will be replaced by read_decode_json or something
similar, when code for AWS Policies is in place.*/
bufferlist bl;
@@ -4732,7 +4783,7 @@ int main(int argc, const char **argv)
string perm_policy;
perm_policy = bl.c_str();
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.get();
if (ret < 0) {
return -ret;
@@ -4751,7 +4802,7 @@ int main(int argc, const char **argv)
cerr << "ERROR: Role name is empty" << std::endl;
return -EINVAL;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.get();
if (ret < 0) {
return -ret;
@@ -4762,11 +4813,16 @@ int main(int argc, const char **argv)
}
case OPT_ROLE_POLICY_GET:
{
- if (role_name.empty() || policy_name.empty()) {
- cerr << "ERROR: One of role name or policy name is empty" << std::endl;
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
return -EINVAL;
}
- RGWRole role(g_ceph_context, store, role_name);
+
+ if (policy_name.empty()) {
+ cerr << "ERROR: policy name is empty" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
int ret = role.get();
if (ret < 0) {
return -ret;
@@ -4781,11 +4837,16 @@ int main(int argc, const char **argv)
}
case OPT_ROLE_POLICY_DELETE:
{
- if (role_name.empty() || policy_name.empty()) {
- cerr << "ERROR: One of role name or policy name is empty" << std::endl;
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (policy_name.empty()) {
+ cerr << "ERROR: policy name is empty" << std::endl;
return -EINVAL;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, tenant);
ret = role.get();
if (ret < 0) {
return -ret;
@@ -6170,6 +6231,26 @@ next:
formatter->flush(cout);
}
+ if (opt_cmd == OPT_MDLOG_AUTOTRIM) {
+ // need a full history for purging old mdlog periods
+ store->meta_mgr->init_oldest_log_period();
+
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http(store->ctx(), crs.get_completion_mgr());
+ int ret = http.set_threaded();
+ if (ret < 0) {
+ cerr << "failed to initialize http client with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+
+ auto num_shards = g_conf->rgw_md_log_max_shards;
+ ret = crs.run(create_admin_meta_log_trim_cr(store, &http, num_shards));
+ if (ret < 0) {
+ cerr << "automated mdlog trim failed with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+ }
+
if (opt_cmd == OPT_MDLOG_TRIM) {
utime_t start_time, end_time;
@@ -6212,14 +6293,13 @@ next:
return -ret;
}
- ret = sync.read_sync_status();
+ rgw_meta_sync_status sync_status;
+ ret = sync.read_sync_status(&sync_status);
if (ret < 0) {
cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl;
return -ret;
}
- const rgw_meta_sync_status& sync_status = sync.get_sync_status();
-
formatter->open_object_section("summary");
encode_json("sync_status", sync_status, formatter);
@@ -6255,7 +6335,7 @@ next:
}
ret = sync.init_sync_status();
if (ret < 0) {
- cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl;
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
return -ret;
}
}
@@ -6336,7 +6416,7 @@ next:
ret = sync.init_sync_status();
if (ret < 0) {
- cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl;
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
return -ret;
}
}
@@ -6384,7 +6464,7 @@ next:
}
ret = sync.init_sync_status();
if (ret < 0) {
- cerr << "ERROR: sync.get_sync_status() returned ret=" << ret << std::endl;
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
return -ret;
}
}
diff --git a/src/rgw/rgw_asio_client.cc b/src/rgw/rgw_asio_client.cc
index 219a36b6837..63de2d27e75 100644
--- a/src/rgw/rgw_asio_client.cc
+++ b/src/rgw/rgw_asio_client.cc
@@ -3,6 +3,7 @@
#include <boost/algorithm/string/predicate.hpp>
#include <boost/asio/write.hpp>
+#include <beast/http/read.hpp>
#include "rgw_asio_client.h"
@@ -12,22 +13,23 @@
#undef dout_prefix
#define dout_prefix (*_dout << "asio: ")
+using namespace rgw::asio;
-RGWAsioClientIO::RGWAsioClientIO(tcp::socket&& socket,
- request_type&& request)
- : socket(std::move(socket)),
- request(std::move(request)),
- txbuf(*this) {
+ClientIO::ClientIO(tcp::socket& socket,
+ parser_type& parser,
+ beast::flat_streambuf& buffer)
+ : socket(socket), parser(parser), buffer(buffer), txbuf(*this)
+{
}
-RGWAsioClientIO::~RGWAsioClientIO() = default;
+ClientIO::~ClientIO() = default;
-void RGWAsioClientIO::init_env(CephContext *cct)
+void ClientIO::init_env(CephContext *cct)
{
env.init(cct);
- body_iter = request.body.begin();
- const auto& headers = request.headers;
+ const auto& request = parser.get();
+ const auto& headers = request.fields;
for (auto header = headers.begin(); header != headers.end(); ++header) {
const auto& name = header->name();
const auto& value = header->value();
@@ -80,42 +82,58 @@ void RGWAsioClientIO::init_env(CephContext *cct)
// TODO: set REMOTE_USER if authenticated
}
-size_t RGWAsioClientIO::write_data(const char* const buf,
- const size_t len)
+size_t ClientIO::write_data(const char* buf, size_t len)
{
boost::system::error_code ec;
auto bytes = boost::asio::write(socket, boost::asio::buffer(buf, len), ec);
if (ec) {
derr << "write_data failed: " << ec.message() << dendl;
throw rgw::io::Exception(ec.value(), std::system_category());
- } else {
- /* According to the documentation of boost::asio::write if there is
- * no error (signalised by ec), then bytes == len. We don't need to
- * take care of partial writes in such situation. */
- return bytes;
}
+ /* According to the documentation of boost::asio::write if there is
+ * no error (signalised by ec), then bytes == len. We don't need to
+ * take care of partial writes in such situation. */
+ return bytes;
}
-size_t RGWAsioClientIO::read_data(char* const buf, const size_t max)
+size_t ClientIO::read_data(char* buf, size_t max)
{
- // read data from the body's bufferlist
- auto bytes = std::min<unsigned>(max, body_iter.get_remaining());
- body_iter.copy(bytes, buf);
- return bytes;
+ auto& message = parser.get();
+ auto& body_remaining = message.body;
+ body_remaining = boost::asio::mutable_buffer{buf, max};
+
+ boost::system::error_code ec;
+
+ dout(30) << this << " read_data for " << max << " with "
+ << buffer.size() << " bytes buffered" << dendl;
+
+ while (boost::asio::buffer_size(body_remaining) && !parser.is_complete()) {
+ auto bytes = beast::http::read_some(socket, buffer, parser, ec);
+ buffer.consume(bytes);
+ if (ec == boost::asio::error::connection_reset ||
+ ec == boost::asio::error::eof ||
+ ec == beast::http::error::partial_message) {
+ break;
+ }
+ if (ec) {
+ derr << "failed to read body: " << ec.message() << dendl;
+ throw rgw::io::Exception(ec.value(), std::system_category());
+ }
+ }
+ return max - boost::asio::buffer_size(body_remaining);
}
-size_t RGWAsioClientIO::complete_request()
+size_t ClientIO::complete_request()
{
return 0;
}
-void RGWAsioClientIO::flush()
+void ClientIO::flush()
{
txbuf.pubsync();
}
-size_t RGWAsioClientIO::send_status(const int status,
- const char* const status_name)
+size_t ClientIO::send_status(int status, const char* status_name)
{
static constexpr size_t STATUS_BUF_SIZE = 128;
@@ -126,7 +144,7 @@ size_t RGWAsioClientIO::send_status(const int status,
return txbuf.sputn(statusbuf, statuslen);
}
-size_t RGWAsioClientIO::send_100_continue()
+size_t ClientIO::send_100_continue()
{
const char HTTTP_100_CONTINUE[] = "HTTP/1.1 100 CONTINUE\r\n\r\n";
const size_t sent = txbuf.sputn(HTTTP_100_CONTINUE,
@@ -148,7 +166,7 @@ static size_t dump_date_header(char (&timestr)[TIME_BUF_SIZE])
"Date: %a, %d %b %Y %H:%M:%S %Z\r\n", tmp);
}
-size_t RGWAsioClientIO::complete_header()
+size_t ClientIO::complete_header()
{
size_t sent = 0;
@@ -172,8 +190,8 @@ size_t RGWAsioClientIO::complete_header()
return sent;
}
-size_t RGWAsioClientIO::send_header(const boost::string_ref& name,
- const boost::string_ref& value)
+size_t ClientIO::send_header(const boost::string_ref& name,
+ const boost::string_ref& value)
{
static constexpr char HEADER_SEP[] = ": ";
static constexpr char HEADER_END[] = "\r\n";
@@ -188,7 +206,7 @@ size_t RGWAsioClientIO::send_header(const boost::string_ref& name,
return sent;
}
-size_t RGWAsioClientIO::send_content_length(const uint64_t len)
+size_t ClientIO::send_content_length(uint64_t len)
{
static constexpr size_t CONLEN_BUF_SIZE = 128;
diff --git a/src/rgw/rgw_asio_client.h b/src/rgw/rgw_asio_client.h
index c87fd5f720b..513a3ef0ca2 100644
--- a/src/rgw/rgw_asio_client.h
+++ b/src/rgw/rgw_asio_client.h
@@ -4,36 +4,58 @@
#define RGW_ASIO_CLIENT_H
#include <boost/asio/ip/tcp.hpp>
-#include <beast/http/body_type.hpp>
-#include <beast/http/concepts.hpp>
-#include <beast/http/message_v1.hpp>
+#include <beast/http/message.hpp>
+#include <beast/http/message_parser.hpp>
+#include <beast/core/flat_streambuf.hpp>
#include "include/assert.h"
#include "rgw_client_io.h"
-// bufferlist to represent the message body
-class RGWBufferlistBody {
- public:
- using value_type = ceph::bufferlist;
+namespace rgw {
+namespace asio {
- class reader;
- class writer;
+/// streaming message body interface
+struct streaming_body {
+ using value_type = boost::asio::mutable_buffer;
- template <bool isRequest, typename Headers>
- using message_type = beast::http::message<isRequest, RGWBufferlistBody,
- Headers>;
-};
+ class reader {
+ value_type& buffer;
+ public:
+ using mutable_buffers_type = boost::asio::mutable_buffers_1;
-class RGWAsioClientIO : public rgw::io::RestfulClient,
- public rgw::io::BuffererSink {
- using tcp = boost::asio::ip::tcp;
- tcp::socket socket;
+ static const bool is_direct{true}; // reads directly into user buffer
- using body_type = RGWBufferlistBody;
- using request_type = beast::http::request_v1<body_type>;
- request_type request;
+ template<bool isRequest, class Fields>
+ explicit reader(beast::http::message<isRequest, streaming_body, Fields>& m)
+ : buffer(m.body)
+ {}
+
+ void init() {}
+ void init(uint64_t content_length) {}
+ void finish() {}
+
+ mutable_buffers_type prepare(size_t n) {
+ n = std::min(n, boost::asio::buffer_size(buffer));
+ auto position = boost::asio::buffer_cast<char*>(buffer);
+ return {position, n};
+ }
- bufferlist::const_iterator body_iter;
+ void commit(size_t n) {
+ buffer = buffer + n;
+ }
+ };
+};
+
+using header_type = beast::http::fields;
+using parser_type = beast::http::message_parser<true, streaming_body, header_type>;
+
+class ClientIO : public io::RestfulClient,
+ public io::BuffererSink {
+ private:
+ using tcp = boost::asio::ip::tcp;
+ tcp::socket& socket;
+ parser_type& parser;
+ beast::flat_streambuf& buffer; //< parse buffer
bool conn_keepalive{false};
bool conn_close{false};
@@ -45,8 +67,11 @@ class RGWAsioClientIO : public rgw::io::RestfulClient,
size_t read_data(char *buf, size_t max);
public:
- RGWAsioClientIO(tcp::socket&& socket, request_type&& request);
- ~RGWAsioClientIO() override;
+ ClientIO(tcp::socket& socket, parser_type& parser,
+ beast::flat_streambuf& buffer);
+ ~ClientIO() override;
+
+ bool get_conn_close() const { return conn_close; }
void init_env(CephContext *cct) override;
size_t complete_request() override;
@@ -71,45 +96,7 @@ class RGWAsioClientIO : public rgw::io::RestfulClient,
}
};
-// used by beast::http::read() to read the body into a bufferlist
-class RGWBufferlistBody::reader {
- value_type& bl;
- public:
- template<bool isRequest, typename Headers>
- explicit reader(message_type<isRequest, Headers>& m) : bl(m.body) {}
-
- void write(const char* data, size_t size, boost::system::error_code&) {
- bl.append(data, size);
- }
-};
-
-// used by beast::http::write() to write the buffered body
-class RGWBufferlistBody::writer {
- const value_type& bl;
- public:
- template<bool isRequest, typename Headers>
- explicit writer(const message_type<isRequest, Headers>& msg)
- : bl(msg.body) {}
-
- void init(boost::system::error_code& ec) {}
- uint64_t content_length() const { return bl.length(); }
-
- template<typename Write>
- boost::tribool operator()(beast::http::resume_context&&,
- boost::system::error_code&, Write&& write) {
- // translate from bufferlist to a ConstBufferSequence for beast
- std::vector<boost::asio::const_buffer> buffers;
- buffers.reserve(bl.get_num_buffers());
- for (auto& ptr : bl.buffers()) {
- buffers.emplace_back(ptr.c_str(), ptr.length());
- }
- write(buffers);
- return true;
- }
-};
-static_assert(beast::http::is_ReadableBody<RGWBufferlistBody>{},
- "RGWBufferlistBody does not satisfy ReadableBody");
-static_assert(beast::http::is_WritableBody<RGWBufferlistBody>{},
- "RGWBufferlistBody does not satisfy WritableBody");
+} // namespace asio
+} // namespace rgw
#endif // RGW_ASIO_CLIENT_H
diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc
index ff2d7806759..5fc1deec42e 100644
--- a/src/rgw/rgw_asio_frontend.cc
+++ b/src/rgw/rgw_asio_frontend.cc
@@ -7,13 +7,11 @@
#include <vector>
#include <boost/asio.hpp>
-#include <boost/optional.hpp>
+#include <boost/asio/spawn.hpp>
#include <beast/core/placeholders.hpp>
-#include <beast/core/streambuf.hpp>
-#include <beast/http/empty_body.hpp>
-#include <beast/http/parse_error.hpp>
#include <beast/http/read.hpp>
+#include <beast/http/string_body.hpp>
#include <beast/http/write.hpp>
#include "rgw_asio_frontend.h"
@@ -71,28 +69,47 @@ void Pauser::wait()
using tcp = boost::asio::ip::tcp;
-class AsioConnection : public std::enable_shared_from_this<AsioConnection> {
- RGWProcessEnv& env;
- boost::asio::io_service::strand strand;
- tcp::socket socket;
- tcp::endpoint endpoint;
- beast::streambuf buf;
- beast::http::request_v1<RGWBufferlistBody> request;
+// coroutine to handle a client connection to completion
+static void handle_connection(RGWProcessEnv& env, tcp::socket socket,
+ boost::asio::yield_context yield)
+{
+ auto cct = env.store->ctx();
+ boost::system::error_code ec;
- public:
- void on_read(boost::system::error_code ec) {
- auto cct = env.store->ctx();
+ beast::flat_streambuf buffer{1024};
+
+ // read messages from the socket until eof
+ for (;;) {
+ // parse the header
+ rgw::asio::parser_type parser;
+ do {
+ auto bytes = beast::http::async_read_some(socket, buffer, parser, yield[ec]);
+ buffer.consume(bytes);
+ } while (!ec && !parser.got_header());
+
+ if (ec == boost::asio::error::connection_reset ||
+ ec == boost::asio::error::eof) {
+ return;
+ }
if (ec) {
- if (ec.category() == beast::http::get_parse_error_category()) {
- ldout(cct, 1) << "parse failed: " << ec.message() << dendl;
- } else {
- ldout(cct, 1) << "read failed: " << ec.message() << dendl;
- }
- write_bad_request();
+ auto& message = parser.get();
+ ldout(cct, 1) << "read failed: " << ec.message() << dendl;
+ ldout(cct, 1) << "====== req done http_status=400 ======" << dendl;
+ beast::http::response<beast::http::string_body> response;
+ response.status = 400;
+ response.reason = "Bad Request";
+ response.version = message.version == 10 ? 10 : 11;
+ beast::http::prepare(response);
+ beast::http::async_write(socket, std::move(response), yield[ec]);
+ // ignore ec
return;
}
+
+ // process the request
RGWRequest req{env.store->get_new_req_id()};
- RGWAsioClientIO real_client{std::move(socket), std::move(request)};
+
+ rgw::asio::ClientIO real_client{socket, parser, buffer};
+
auto real_client_io = rgw::io::add_reordering(
rgw::io::add_buffering(
rgw::io::add_chunking(
@@ -101,40 +118,12 @@ class AsioConnection : public std::enable_shared_from_this<AsioConnection> {
RGWRestfulIO client(&real_client_io);
process_request(env.store, env.rest, &req, env.uri_prefix,
*env.auth_registry, &client, env.olog);
- }
-
- void write_bad_request() {
- beast::http::response_v1<beast::http::empty_body> response;
- response.status = 400;
- response.reason = "Bad Request";
- /* If the request is so terribly malformed that we can't extract even
- * the protocol version, we will use HTTP/1.1 as a fallback. */
- response.version = request.version ? request.version : 11;
- beast::http::prepare(response);
- beast::http::async_write(socket, std::move(response),
- std::bind(&AsioConnection::on_write,
- shared_from_this(),
- beast::asio::placeholders::error));
- }
- void on_write(boost::system::error_code ec) {
- auto cct = env.store->ctx();
- if (ec) {
- ldout(cct, 1) << "write failed: " << ec.message() << dendl;
+ if (real_client.get_conn_close()) {
+ return;
}
}
-
- public:
- AsioConnection(RGWProcessEnv& env, tcp::socket&& socket)
- : env(env), strand(socket.get_io_service()), socket(std::move(socket))
- {}
-
- void read() {
- beast::http::async_read(socket, buf, request, strand.wrap(
- std::bind(&AsioConnection::on_read, shared_from_this(),
- beast::asio::placeholders::error)));
- }
-};
+}
class AsioFrontend {
RGWProcessEnv env;
@@ -168,9 +157,19 @@ int AsioFrontend::init()
auto ep = tcp::endpoint{tcp::v4(), static_cast<unsigned short>(env.port)};
ldout(ctx(), 4) << "frontend listening on " << ep << dendl;
- acceptor.open(ep.protocol());
+ boost::system::error_code ec;
+ acceptor.open(ep.protocol(), ec);
+ if (ec) {
+ lderr(ctx()) << "failed to open socket: " << ec.message() << dendl;
+ return -ec.value();
+ }
acceptor.set_option(tcp::acceptor::reuse_address(true));
- acceptor.bind(ep);
+ acceptor.bind(ep, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to bind address " << ep <<
+ ": " << ec.message() << dendl;
+ return -ec.value();
+ }
acceptor.listen(boost::asio::socket_base::max_connections);
acceptor.async_accept(peer_socket,
[this] (boost::system::error_code ec) {
@@ -189,13 +188,15 @@ void AsioFrontend::accept(boost::system::error_code ec)
throw ec;
}
auto socket = std::move(peer_socket);
-
+ // spawn a coroutine to handle the connection
+ boost::asio::spawn(service,
+ [&] (boost::asio::yield_context yield) {
+ handle_connection(env, std::move(socket), yield);
+ });
acceptor.async_accept(peer_socket,
[this] (boost::system::error_code ec) {
return accept(ec);
});
-
- std::make_shared<AsioConnection>(env, std::move(socket))->read();
}
int AsioFrontend::run()
diff --git a/src/rgw/rgw_auth.cc b/src/rgw/rgw_auth.cc
index 6311d4b16ab..c63db0ccec5 100644
--- a/src/rgw/rgw_auth.cc
+++ b/src/rgw/rgw_auth.cc
@@ -56,6 +56,21 @@ transform_old_authinfo(const req_state* const s)
return id == acct_id;
}
+ bool is_identity(const idset_t& ids) const override {
+ for (auto& p : ids) {
+ if (p.is_wildcard()) {
+ return true;
+ } else if (p.is_tenant() && p.get_tenant() == id.tenant) {
+ return true;
+ } else if (p.is_user() &&
+ (p.get_tenant() == id.tenant) &&
+ (p.get_id() == id.id)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
uint32_t get_perm_mask() const override {
return perm_mask;
}
@@ -291,6 +306,29 @@ bool rgw::auth::RemoteApplier::is_owner_of(const rgw_user& uid) const
return info.acct_user == uid;
}
+bool rgw::auth::RemoteApplier::is_identity(const idset_t& ids) const {
+ for (auto& id : ids) {
+ if (id.is_wildcard()) {
+ return true;
+
+ // We also need to cover cases where rgw_keystone_implicit_tenants
+ // was enabled. */
+ } else if (id.is_tenant() &&
+ (info.acct_user.tenant.empty() ?
+ info.acct_user.id :
+ info.acct_user.tenant) == id.get_tenant()) {
+ return true;
+ } else if (id.is_user() &&
+ info.acct_user.id == id.get_id() &&
+ (info.acct_user.tenant.empty() ?
+ info.acct_user.id :
+ info.acct_user.tenant) == id.get_tenant()) {
+ return true;
+ }
+ }
+ return false;
+}
+
void rgw::auth::RemoteApplier::to_str(std::ostream& out) const
{
out << "rgw::auth::RemoteApplier(acct_user=" << info.acct_user
@@ -383,8 +421,23 @@ bool rgw::auth::LocalApplier::is_owner_of(const rgw_user& uid) const
return uid == user_info.user_id;
}
-void rgw::auth::LocalApplier::to_str(std::ostream& out) const
-{
+bool rgw::auth::LocalApplier::is_identity(const idset_t& ids) const {
+ for (auto& id : ids) {
+ if (id.is_wildcard()) {
+ return true;
+ } else if (id.is_tenant() &&
+ id.get_tenant() == user_info.user_id.tenant) {
+ return true;
+ } else if (id.is_user() &&
+ (id.get_tenant() == user_info.user_id.tenant) &&
+ (id.get_id() == user_info.user_id.id)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void rgw::auth::LocalApplier::to_str(std::ostream& out) const {
out << "rgw::auth::LocalApplier(acct_user=" << user_info.user_id
<< ", acct_name=" << user_info.display_name
<< ", subuser=" << subuser
diff --git a/src/rgw/rgw_auth.h b/src/rgw/rgw_auth.h
index 8118ac150d3..819a3380385 100644
--- a/src/rgw/rgw_auth.h
+++ b/src/rgw/rgw_auth.h
@@ -16,7 +16,6 @@
#define RGW_USER_ANON_ID "anonymous"
-
namespace rgw {
namespace auth {
@@ -28,6 +27,7 @@ using Exception = std::system_error;
class Identity {
public:
typedef std::map<std::string, int> aclspec_t;
+ using idset_t = boost::container::flat_set<Principal>;
virtual ~Identity() = default;
@@ -64,6 +64,10 @@ public:
}
virtual void to_str(std::ostream& out) const = 0;
+
+ /* Verify whether a given identity corresponds to an identity in the
+ provided set */
+ virtual bool is_identity(const idset_t& ids) const = 0;
};
inline std::ostream& operator<<(std::ostream& out,
@@ -404,6 +408,8 @@ public:
uint32_t get_perms_from_aclspec(const aclspec_t& aclspec) const override;
bool is_admin_of(const rgw_user& uid) const override;
bool is_owner_of(const rgw_user& uid) const override;
+ bool is_identity(const idset_t& ids) const override;
+
uint32_t get_perm_mask() const override { return info.perm_mask; }
void to_str(std::ostream& out) const override;
void load_acct_info(RGWUserInfo& user_info) const override; /* out */
@@ -449,6 +455,7 @@ public:
uint32_t get_perms_from_aclspec(const aclspec_t& aclspec) const override;
bool is_admin_of(const rgw_user& uid) const override;
bool is_owner_of(const rgw_user& uid) const override;
+ bool is_identity(const idset_t& ids) const override;
uint32_t get_perm_mask() const override {
return get_perm_mask(subuser, user_info);
}
diff --git a/src/rgw/rgw_auth_filters.h b/src/rgw/rgw_auth_filters.h
index 204a43e73f8..78846fc28d9 100644
--- a/src/rgw/rgw_auth_filters.h
+++ b/src/rgw/rgw_auth_filters.h
@@ -80,6 +80,11 @@ public:
return get_decoratee().get_perm_mask();
}
+ bool is_identity(
+ const boost::container::flat_set<Principal>& ids) const override {
+ return get_decoratee().is_identity(ids);
+ }
+
void to_str(std::ostream& out) const override {
get_decoratee().to_str(out);
}
diff --git a/src/rgw/rgw_basic_types.cc b/src/rgw/rgw_basic_types.cc
index 5ebf1cfe446..c16d920f21b 100644
--- a/src/rgw/rgw_basic_types.cc
+++ b/src/rgw/rgw_basic_types.cc
@@ -1,6 +1,13 @@
+#include <iostream>
+#include <sstream>
+#include <string>
+
#include "rgw_basic_types.h"
#include "common/ceph_json.h"
+using std::string;
+using std::stringstream;
+
void decode_json_obj(rgw_user& val, JSONObj *obj)
{
string s = obj->get_data();
@@ -12,3 +19,24 @@ void encode_json(const char *name, const rgw_user& val, Formatter *f)
string s = val.to_str();
f->dump_string(name, s);
}
+
+namespace rgw {
+namespace auth {
+ostream& operator <<(ostream& m, const Principal& p) {
+ if (p.is_wildcard()) {
+ return m << "*";
+ }
+
+ m << "arn:aws:iam:" << p.get_tenant() << ":";
+ if (p.is_tenant()) {
+ return m << "root";
+ }
+ return m << (p.is_user() ? "user/" : "role/") << p.get_id();
+}
+string to_string(const Principal& p) {
+ stringstream s;
+ s << p;
+ return s.str();
+}
+}
+}
diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h
index 261b201369a..31e9d3a32ac 100644
--- a/src/rgw/rgw_basic_types.h
+++ b/src/rgw/rgw_basic_types.h
@@ -1,3 +1,5 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef CEPH_RGW_BASIC_TYPES_H
#define CEPH_RGW_BASIC_TYPES_H
@@ -18,6 +20,10 @@ struct rgw_user {
: tenant(tenant),
id(id) {
}
+ rgw_user(std::string&& tenant, std::string&& id)
+ : tenant(std::move(tenant)),
+ id(std::move(id)) {
+ }
void encode(bufferlist& bl) const {
ENCODE_START(1, 1, bl);
@@ -100,6 +106,82 @@ struct rgw_user {
};
WRITE_CLASS_ENCODER(rgw_user)
+// Represents an identity. This is more wide-ranging than a
+// 'User'. Its purposes is to be matched against by an
+// IdentityApplier. The internal representation will doubtless change as
+// more types are added. We may want to expose the type enum and make
+// the member public so people can switch/case on it.
+
+namespace rgw {
+namespace auth {
+class Principal {
+ enum types { User, Role, Tenant, Wildcard };
+ types t;
+ rgw_user u;
+
+ Principal(types t)
+ : t(t) {}
+
+ Principal(types t, std::string&& n, std::string i)
+ : t(t), u(std::move(n), std::move(i)) {}
+
+public:
+
+ static Principal wildcard() {
+ return Principal(Wildcard);
+ }
+
+ static Principal user(std::string&& t, std::string&& u) {
+ return Principal(User, std::move(t), std::move(u));
+ }
+
+ static Principal role(std::string&& t, std::string&& u) {
+ return Principal(Role, std::move(t), std::move(u));
+ }
+
+ static Principal tenant(std::string&& t) {
+ return Principal(Tenant, std::move(t), {});
+ }
+
+ bool is_wildcard() const {
+ return t == Wildcard;
+ }
+
+ bool is_user() const {
+ return t == User;
+ }
+
+ bool is_role() const {
+ return t == Role;
+ }
+
+ bool is_tenant() const {
+ return t == Tenant;
+ }
+
+ const std::string& get_tenant() const {
+ ceph_assert(t != Wildcard);
+ return u.tenant;
+ }
+
+ const std::string& get_id() const {
+ ceph_assert(t != Wildcard && t != Tenant);
+ return u.id;
+ }
+
+ bool operator ==(const Principal& o) const {
+ return (t == o.t) && (u == o.u);
+ }
+
+ bool operator <(const Principal& o) const {
+ return (t < o.t) || ((t == o.t) && (u < o.u));
+ }
+};
+
+std::ostream& operator <<(std::ostream& m, const Principal& p);
+std::string to_string(const Principal& p);
+}
+}
class JSONObj;
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 049ab4a294b..7aeee21ad25 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -51,18 +51,9 @@ void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id)
* acceptable in bucket names and thus qualified buckets cannot conflict
* with the legacy or S3 buckets.
*/
-void rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry) {
- if (bucket_name.empty()) {
- bucket_entry.clear();
- } else if (tenant_name.empty()) {
- bucket_entry = bucket_name;
- } else {
- bucket_entry = tenant_name + "/" + bucket_name;
- }
-}
-
-string rgw_make_bucket_entry_name(const string& tenant_name, const string& bucket_name) {
- string bucket_entry;
+std::string rgw_make_bucket_entry_name(const std::string& tenant_name,
+ const std::string& bucket_name) {
+ std::string bucket_entry;
if (bucket_name.empty()) {
bucket_entry.clear();
@@ -1959,11 +1950,11 @@ int RGWDataChangesLog::trim_entries(const real_time& start_time, const real_time
bool RGWDataChangesLog::going_down()
{
- return (down_flag.read() != 0);
+ return down_flag;
}
RGWDataChangesLog::~RGWDataChangesLog() {
- down_flag.set(1);
+ down_flag = true;
renew_thread->stop();
renew_thread->join();
delete renew_thread;
@@ -2243,7 +2234,7 @@ public:
bci.info.bucket.name = bucket_name;
bci.info.bucket.bucket_id = bucket_instance;
bci.info.bucket.tenant = tenant_name;
- ret = store->select_bucket_location_by_rule(bci.info.placement_rule, bci.info.bucket, &rule_info);
+ ret = store->select_bucket_location_by_rule(bci.info.placement_rule, &rule_info);
if (ret < 0) {
ldout(store->ctx(), 0) << "ERROR: select_bucket_placement() returned " << ret << dendl;
return ret;
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h
index e691a1178a6..14c472faa8c 100644
--- a/src/rgw/rgw_bucket.h
+++ b/src/rgw/rgw_bucket.h
@@ -20,9 +20,6 @@
#include "common/ceph_time.h"
#include "rgw_formats.h"
-
-using namespace std;
-
// define as static when RGWBucket implementation compete
extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id);
@@ -49,11 +46,14 @@ extern int rgw_bucket_delete_bucket_obj(RGWRados *store,
extern int rgw_bucket_sync_user_stats(RGWRados *store, const rgw_user& user_id, const RGWBucketInfo& bucket_info);
extern int rgw_bucket_sync_user_stats(RGWRados *store, const string& tenant_name, const string& bucket_name);
-extern void rgw_make_bucket_entry_name(const string& tenant_name,
- const string& bucket_name,
- string& bucket_entry);
-extern string rgw_make_bucket_entry_name(const string& tenant_name,
- const string& bucket_name);
+extern std::string rgw_make_bucket_entry_name(const std::string& tenant_name,
+ const std::string& bucket_name);
+static inline void rgw_make_bucket_entry_name(const string& tenant_name,
+ const string& bucket_name,
+ std::string& bucket_entry) {
+ bucket_entry = rgw_make_bucket_entry_name(tenant_name, bucket_name);
+}
+
extern void rgw_parse_url_bucket(const string& bucket,
const string& auth_tenant,
string &tenant_name, string &bucket_name);
@@ -401,7 +401,7 @@ class RGWDataChangesLog {
RWLock modified_lock;
map<int, set<string> > modified_shards;
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
struct ChangeStatus {
real_time cur_expiration;
diff --git a/src/rgw/rgw_cache.h b/src/rgw/rgw_cache.h
index 32068d39073..6bc9ef14903 100644
--- a/src/rgw/rgw_cache.h
+++ b/src/rgw/rgw_cache.h
@@ -231,7 +231,8 @@ public:
bufferlist& data,
RGWObjVersionTracker *objv_tracker,
real_time set_mtime) override;
- int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive) override;
+ int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl, off_t ofs, bool exclusive,
+ RGWObjVersionTracker *objv_tracker = nullptr) override;
int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
@@ -422,7 +423,8 @@ int RGWCache<T>::put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, real_time
}
template <class T>
-int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& data, off_t ofs, bool exclusive)
+int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& data, off_t ofs, bool exclusive,
+ RGWObjVersionTracker *objv_tracker)
{
rgw_pool pool;
string oid;
@@ -436,7 +438,11 @@ int RGWCache<T>::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& da
info.status = 0;
info.flags = CACHE_FLAG_DATA;
}
- int ret = T::put_system_obj_data(ctx, obj, data, ofs, exclusive);
+ if (objv_tracker) {
+ info.version = objv_tracker->write_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ int ret = T::put_system_obj_data(ctx, obj, data, ofs, exclusive, objv_tracker);
if (cacheable) {
string name = normal_name(pool, oid);
if (ret >= 0) {
diff --git a/src/rgw/rgw_civetweb_frontend.cc b/src/rgw/rgw_civetweb_frontend.cc
index c564d0e4920..a18a6c5da41 100644
--- a/src/rgw/rgw_civetweb_frontend.cc
+++ b/src/rgw/rgw_civetweb_frontend.cc
@@ -66,32 +66,30 @@ int RGWCivetWebFrontend::run()
/* Prepare options for CivetWeb. */
const std::set<boost::string_ref> rgw_opts = { "port", "prefix" };
- const size_t CW_NUM_OPTS = 2 * (conf_map.size() - rgw_opts.size()) + 1;
- const char *options[CW_NUM_OPTS];
- size_t i = 0;
+
+ std::vector<const char*> options;
for (const auto& pair : conf_map) {
if (! rgw_opts.count(pair.first)) {
/* CivetWeb doesn't understand configurables of the glue layer between
* it and RadosGW. We need to strip them out. Otherwise CivetWeb would
* signalise an error. */
- options[i + 0] = pair.first.c_str();
- options[i + 1] = pair.second.c_str();
+ options.push_back(pair.first.c_str());
+ options.push_back(pair.second.c_str());
- dout(20) << "civetweb config: " << options[i] << ": "
- << (options[i + 1] ? options[i + 1] : "<null>") << dendl;
- i += 2;
+ dout(20) << "civetweb config: " << pair.first
+ << ": " << pair.second << dendl;
}
}
- options[i] = nullptr;
+ options.push_back(nullptr);
/* Initialize the CivetWeb right now. */
struct mg_callbacks cb;
memset((void *)&cb, 0, sizeof(cb));
cb.begin_request = civetweb_callback;
cb.log_message = rgw_civetweb_log_callback;
cb.log_access = rgw_civetweb_log_access_callback;
- ctx = mg_start(&cb, this, (const char **)&options);
+ ctx = mg_start(&cb, this, options.data());
return ! ctx ? -EIO : 0;
} /* RGWCivetWebFrontend::run */
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index fbff872ee21..009918b76ff 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -11,10 +11,12 @@
#include "json_spirit/json_spirit.h"
#include "common/ceph_json.h"
+#include "rgw_op.h"
#include "rgw_common.h"
#include "rgw_acl.h"
#include "rgw_string.h"
#include "rgw_rados.h"
+#include "rgw_http_errors.h"
#include "common/ceph_crypto.h"
#include "common/armor.h"
@@ -32,15 +34,87 @@
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
-#define POLICY_ACTION 0x01
-#define POLICY_RESOURCE 0x02
-#define POLICY_ARN 0x04
-#define POLICY_STRING 0x08
+using boost::none;
+using boost::optional;
+
+using rgw::IAM::ARN;
+using rgw::IAM::Effect;
+using rgw::IAM::op_to_perm;
+using rgw::IAM::Policy;
PerfCounters *perfcounter = NULL;
const uint32_t RGWBucketInfo::NUM_SHARDS_BLIND_BUCKET(UINT32_MAX);
+rgw_http_errors rgw_http_s3_errors({
+ { 0, {200, "" }},
+ { STATUS_CREATED, {201, "Created" }},
+ { STATUS_ACCEPTED, {202, "Accepted" }},
+ { STATUS_NO_CONTENT, {204, "NoContent" }},
+ { STATUS_PARTIAL_CONTENT, {206, "" }},
+ { ERR_PERMANENT_REDIRECT, {301, "PermanentRedirect" }},
+ { ERR_WEBSITE_REDIRECT, {301, "WebsiteRedirect" }},
+ { STATUS_REDIRECT, {303, "" }},
+ { ERR_NOT_MODIFIED, {304, "NotModified" }},
+ { EINVAL, {400, "InvalidArgument" }},
+ { ERR_INVALID_REQUEST, {400, "InvalidRequest" }},
+ { ERR_INVALID_DIGEST, {400, "InvalidDigest" }},
+ { ERR_BAD_DIGEST, {400, "BadDigest" }},
+ { ERR_INVALID_BUCKET_NAME, {400, "InvalidBucketName" }},
+ { ERR_INVALID_OBJECT_NAME, {400, "InvalidObjectName" }},
+ { ERR_UNRESOLVABLE_EMAIL, {400, "UnresolvableGrantByEmailAddress" }},
+ { ERR_INVALID_PART, {400, "InvalidPart" }},
+ { ERR_INVALID_PART_ORDER, {400, "InvalidPartOrder" }},
+ { ERR_REQUEST_TIMEOUT, {400, "RequestTimeout" }},
+ { ERR_TOO_LARGE, {400, "EntityTooLarge" }},
+ { ERR_TOO_SMALL, {400, "EntityTooSmall" }},
+ { ERR_TOO_MANY_BUCKETS, {400, "TooManyBuckets" }},
+ { ERR_MALFORMED_XML, {400, "MalformedXML" }},
+ { ERR_AMZ_CONTENT_SHA256_MISMATCH, {400, "XAmzContentSHA256Mismatch" }},
+ { ERR_LENGTH_REQUIRED, {411, "MissingContentLength" }},
+ { EACCES, {403, "AccessDenied" }},
+ { EPERM, {403, "AccessDenied" }},
+ { ERR_SIGNATURE_NO_MATCH, {403, "SignatureDoesNotMatch" }},
+ { ERR_INVALID_ACCESS_KEY, {403, "InvalidAccessKeyId" }},
+ { ERR_USER_SUSPENDED, {403, "UserSuspended" }},
+ { ERR_REQUEST_TIME_SKEWED, {403, "RequestTimeTooSkewed" }},
+ { ERR_QUOTA_EXCEEDED, {403, "QuotaExceeded" }},
+ { ENOENT, {404, "NoSuchKey" }},
+ { ERR_NO_SUCH_BUCKET, {404, "NoSuchBucket" }},
+ { ERR_NO_SUCH_WEBSITE_CONFIGURATION, {404, "NoSuchWebsiteConfiguration" }},
+ { ERR_NO_SUCH_UPLOAD, {404, "NoSuchUpload" }},
+ { ERR_NOT_FOUND, {404, "Not Found"}},
+ { ERR_NO_SUCH_LC, {404, "NoSuchLifecycleConfiguration"}},
+ { ERR_METHOD_NOT_ALLOWED, {405, "MethodNotAllowed" }},
+ { ETIMEDOUT, {408, "RequestTimeout" }},
+ { EEXIST, {409, "BucketAlreadyExists" }},
+ { ERR_USER_EXIST, {409, "UserAlreadyExists" }},
+ { ERR_EMAIL_EXIST, {409, "EmailExists" }},
+ { ERR_KEY_EXIST, {409, "KeyExists"}},
+ { ERR_INVALID_SECRET_KEY, {400, "InvalidSecretKey"}},
+ { ERR_INVALID_KEY_TYPE, {400, "InvalidKeyType"}},
+ { ERR_INVALID_CAP, {400, "InvalidCapability"}},
+ { ERR_INVALID_TENANT_NAME, {400, "InvalidTenantName" }},
+ { ENOTEMPTY, {409, "BucketNotEmpty" }},
+ { ERR_PRECONDITION_FAILED, {412, "PreconditionFailed" }},
+ { ERANGE, {416, "InvalidRange" }},
+ { ERR_UNPROCESSABLE_ENTITY, {422, "UnprocessableEntity" }},
+ { ERR_LOCKED, {423, "Locked" }},
+ { ERR_INTERNAL_ERROR, {500, "InternalError" }},
+ { ERR_NOT_IMPLEMENTED, {501, "NotImplemented" }},
+ { ERR_SERVICE_UNAVAILABLE, {503, "ServiceUnavailable"}},
+});
+
+rgw_http_errors rgw_http_swift_errors({
+ { EACCES, {403, "AccessDenied" }},
+ { EPERM, {401, "AccessDenied" }},
+ { ERR_USER_SUSPENDED, {401, "UserSuspended" }},
+ { ERR_INVALID_UTF8, {412, "Invalid UTF8" }},
+ { ERR_BAD_URL, {412, "Bad URL" }},
+ { ERR_NOT_SLO_MANIFEST, {400, "Not an SLO manifest" }},
+ { ERR_QUOTA_EXCEEDED, {413, "QuotaExceeded" }},
+});
+
int rgw_perf_start(CephContext *cct)
{
PerfCountersBuilder plb(cct, cct->_conf->name.to_str(), l_rgw_first, l_rgw_last);
@@ -84,12 +158,6 @@ rgw_err()
clear();
}
-rgw_err::
-rgw_err(int http, const std::string& s3)
- : http_ret(http), ret(0), s3_code(s3)
-{
-}
-
void rgw_err::
clear()
{
@@ -224,6 +292,77 @@ req_state::~req_state() {
delete object_acl;
}
+bool search_err(rgw_http_errors& errs, int err_no, bool is_website_redirect, int& http_ret, string& code)
+{
+ auto r = errs.find(err_no);
+ if (r != errs.end()) {
+ if (! is_website_redirect)
+ http_ret = r->second.first;
+ code = r->second.second;
+ return true;
+ }
+ return false;
+}
+
+void set_req_state_err(struct rgw_err& err, /* out */
+ int err_no, /* in */
+ const int prot_flags) /* in */
+{
+ if (err_no < 0)
+ err_no = -err_no;
+
+ err.ret = -err_no;
+ bool is_website_redirect = false;
+
+ if (prot_flags & RGW_REST_SWIFT) {
+ if (search_err(rgw_http_swift_errors, err_no, is_website_redirect, err.http_ret, err.s3_code))
+ return;
+ }
+
+ //Default to searching in s3 errors
+ is_website_redirect |= (prot_flags & RGW_REST_WEBSITE)
+ && err_no == ERR_WEBSITE_REDIRECT && err.is_clear();
+ if (search_err(rgw_http_s3_errors, err_no, is_website_redirect, err.http_ret, err.s3_code))
+ return;
+ dout(0) << "WARNING: set_req_state_err err_no=" << err_no
+ << " resorting to 500" << dendl;
+
+ err.http_ret = 500;
+ err.s3_code = "UnknownError";
+}
+
+void set_req_state_err(struct req_state* s, int err_no, const string& err_msg)
+{
+ if (s) {
+ set_req_state_err(s, err_no);
+ s->err.message = err_msg;
+ }
+}
+
+void set_req_state_err(struct req_state* s, int err_no)
+{
+ if (s) {
+ set_req_state_err(s->err, err_no, s->prot_flags);
+ }
+}
+
+void dump(struct req_state* s)
+{
+ if (s->format != RGW_FORMAT_HTML)
+ s->formatter->open_object_section("Error");
+ if (!s->err.s3_code.empty())
+ s->formatter->dump_string("Code", s->err.s3_code);
+ if (!s->err.message.empty())
+ s->formatter->dump_string("Message", s->err.message);
+ if (!s->bucket_name.empty()) // TODO: connect to expose_bucket
+ s->formatter->dump_string("BucketName", s->bucket_name);
+ if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->dump_string("HostId", s->host_id);
+ if (s->format != RGW_FORMAT_HTML)
+ s->formatter->close_section();
+}
+
struct str_len {
const char *str;
int len;
@@ -938,17 +1077,39 @@ bool verify_requester_payer_permission(struct req_state *s)
}
bool verify_bucket_permission(struct req_state * const s,
+ const rgw_bucket& bucket,
RGWAccessControlPolicy * const user_acl,
RGWAccessControlPolicy * const bucket_acl,
- const int perm)
+ const optional<Policy>& bucket_policy,
+ const uint64_t op)
{
- if (!bucket_acl)
+ if (!verify_requester_payer_permission(s))
return false;
- if ((perm & (int)s->perm_mask) != perm)
+ if (bucket_policy) {
+ auto r = bucket_policy->eval(s->env, *s->auth.identity, op, ARN(bucket));
+ if (r == Effect::Allow)
+ // It looks like S3 ACLs only GRANT permissions rather than
+ // denying them, so this should be safe.
+ return true;
+ else if (r == Effect::Deny)
+ return false;
+ }
+
+ const auto perm = op_to_perm(op);
+
+ return verify_bucket_permission_no_policy(s, user_acl, bucket_acl, perm);
+}
+
+bool verify_bucket_permission_no_policy(struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const int perm)
+{
+ if (!bucket_acl)
return false;
- if (!verify_requester_payer_permission(s))
+ if ((perm & (int)s->perm_mask) != perm)
return false;
if (bucket_acl->verify_permission(*s->auth.identity, perm, perm,
@@ -961,35 +1122,76 @@ bool verify_bucket_permission(struct req_state * const s,
return user_acl->verify_permission(*s->auth.identity, perm, perm);
}
-bool verify_bucket_permission(struct req_state * const s, const int perm)
+bool verify_bucket_permission_no_policy(struct req_state * const s, const int perm)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ return verify_bucket_permission_no_policy(s,
+ s->user_acl.get(),
+ s->bucket_acl,
+ perm);
+}
+
+bool verify_bucket_permission(struct req_state * const s, const uint64_t op)
{
return verify_bucket_permission(s,
+ s->bucket,
s->user_acl.get(),
s->bucket_acl,
- perm);
+ s->iam_policy,
+ op);
+}
+
+static inline bool check_deferred_bucket_perms(struct req_state * const s,
+ const rgw_bucket& bucket,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const optional<Policy>& bucket_policy,
+ const uint8_t deferred_check,
+ const uint64_t op)
+{
+ return (s->defer_to_bucket_acls == deferred_check \
+ && verify_bucket_permission(s, bucket, user_acl, bucket_acl, bucket_policy, op));
}
-static inline bool check_deferred_bucket_acl(struct req_state * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- const uint8_t deferred_check,
- const int perm)
+static inline bool check_deferred_bucket_only_acl(struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const uint8_t deferred_check,
+ const int perm)
{
return (s->defer_to_bucket_acls == deferred_check \
- && verify_bucket_permission(s, user_acl, bucket_acl, perm));
+ && verify_bucket_permission_no_policy(s, user_acl, bucket_acl, perm));
}
bool verify_object_permission(struct req_state * const s,
+ const rgw_obj& obj,
RGWAccessControlPolicy * const user_acl,
RGWAccessControlPolicy * const bucket_acl,
RGWAccessControlPolicy * const object_acl,
- const int perm)
+ const optional<Policy>& bucket_policy,
+ const uint64_t op)
{
if (!verify_requester_payer_permission(s))
return false;
- if (check_deferred_bucket_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) ||
- check_deferred_bucket_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, RGW_PERM_FULL_CONTROL)) {
+ if (bucket_policy) {
+ auto r = bucket_policy->eval(s->env, *s->auth.identity, op, ARN(obj));
+ if (r == Effect::Allow)
+ // It looks like S3 ACLs only GRANT permissions rather than
+ // denying them, so this should be safe.
+ return true;
+ else if (r == Effect::Deny)
+ return false;
+ }
+
+ const auto perm = op_to_perm(op);
+
+ if (check_deferred_bucket_perms(s, obj.bucket, user_acl, bucket_acl, bucket_policy,
+ RGW_DEFER_TO_BUCKET_ACLS_RECURSE, op) ||
+ check_deferred_bucket_perms(s, obj.bucket, user_acl, bucket_acl, bucket_policy,
+ RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, rgw::IAM::s3All)) {
return true;
}
@@ -1029,13 +1231,72 @@ bool verify_object_permission(struct req_state * const s,
return user_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm);
}
-bool verify_object_permission(struct req_state *s, int perm)
+bool verify_object_permission_no_policy(struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ const int perm)
+{
+ if (check_deferred_bucket_only_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) ||
+ check_deferred_bucket_only_acl(s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, RGW_PERM_FULL_CONTROL)) {
+ return true;
+ }
+
+ if (!object_acl) {
+ return false;
+ }
+
+ bool ret = object_acl->verify_permission(*s->auth.identity, s->perm_mask, perm);
+ if (ret) {
+ return true;
+ }
+
+ if (!s->cct->_conf->rgw_enforce_swift_acls)
+ return ret;
+
+ if ((perm & (int)s->perm_mask) != perm)
+ return false;
+
+ int swift_perm = 0;
+ if (perm & (RGW_PERM_READ | RGW_PERM_READ_ACP))
+ swift_perm |= RGW_PERM_READ_OBJS;
+ if (perm & RGW_PERM_WRITE)
+ swift_perm |= RGW_PERM_WRITE_OBJS;
+
+ if (!swift_perm)
+ return false;
+
+ /* we already verified the user mask above, so we pass swift_perm as the mask here,
+ otherwise the mask might not cover the swift permissions bits */
+ if (bucket_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm,
+ s->info.env->get("HTTP_REFERER")))
+ return true;
+
+ if (!user_acl)
+ return false;
+
+ return user_acl->verify_permission(*s->auth.identity, swift_perm, swift_perm);
+}
+
+bool verify_object_permission_no_policy(struct req_state *s, int perm)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ return verify_object_permission_no_policy(s, s->user_acl.get(),
+ s->bucket_acl, s->object_acl,
+ perm);
+}
+
+bool verify_object_permission(struct req_state *s, uint64_t op)
{
return verify_object_permission(s,
- s->user_acl.get(),
+ rgw_obj(s->bucket, s->object),
+ s->user_acl.get(),
s->bucket_acl,
s->object_acl,
- perm);
+ s->iam_policy,
+ op);
}
class HexTable
@@ -1613,7 +1874,7 @@ static int matchignorecase(const char& c1, const char& c2)
return 0;
}
-int match(const string& pattern, const string& input, int flag)
+int match(const string& pattern, const string& input, uint32_t flag)
{
auto last_pos_input = 0, last_pos_pattern = 0;
@@ -1625,7 +1886,9 @@ int match(const string& pattern, const string& input, int flag)
string substr_pattern = pattern.substr(last_pos_pattern, cur_pos_pattern);
int res;
- if (flag & POLICY_ACTION || flag & POLICY_ARN) {
+ if (substr_pattern == "*") {
+ res = 1;
+ } else if (flag & MATCH_POLICY_ACTION || flag & MATCH_POLICY_ARN) {
res = match_internal(substr_pattern, substr_input, &matchignorecase);
} else {
res = match_internal(substr_pattern, substr_input, &matchcase);
@@ -1636,7 +1899,7 @@ int match(const string& pattern, const string& input, int flag)
if (cur_pos_pattern == string::npos && cur_pos_input == string::npos)
return 1;
else if ((cur_pos_pattern == string::npos && cur_pos_input != string::npos) ||
- (cur_pos_pattern != string::npos && cur_pos_input == string::npos))
+ (cur_pos_pattern != string::npos && cur_pos_input == string::npos))
return 0;
last_pos_pattern = cur_pos_pattern + 1;
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 5ad7637b44c..6e24bb1d419 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -21,6 +21,7 @@
#include "acconfig.h"
#include "rgw_acl.h"
#include "rgw_cors.h"
+#include "rgw_iam_policy.h"
#include "rgw_quota.h"
#include "rgw_string.h"
#include "rgw_website.h"
@@ -29,8 +30,6 @@
#include "cls/rgw/cls_rgw_types.h"
#include "include/rados/librados.hpp"
-using namespace std;
-
namespace ceph {
class Formatter;
}
@@ -99,6 +98,10 @@ using ceph::crypto::MD5;
#define RGW_ATTR_COMPRESSION RGW_ATTR_PREFIX "compression"
+/* IAM Policy */
+#define RGW_ATTR_IAM_POLICY RGW_ATTR_PREFIX "iam-policy"
+
+
/* RGW File Attributes */
#define RGW_ATTR_UNIX_KEY1 RGW_ATTR_PREFIX "unix-key1"
#define RGW_ATTR_UNIX1 RGW_ATTR_PREFIX "unix1"
@@ -202,6 +205,8 @@ using ceph::crypto::MD5;
#define UINT32_MAX (0xffffffffu)
#endif
+struct req_state;
+
typedef void *RGWAccessHandle;
@@ -263,7 +268,6 @@ enum RGWObjCategory {
/** Store error returns for output at a different point in the program */
struct rgw_err {
rgw_err();
- rgw_err(int http, const std::string &s3);
void clear();
bool is_clear() const;
bool is_err() const;
@@ -275,6 +279,8 @@ struct rgw_err {
std::string message;
};
+
+
/* Helper class used for RGWHTTPArgs parsing */
class NameVal
{
@@ -451,6 +457,9 @@ enum RGWOpType {
RGW_OP_GET_ROLE_POLICY,
RGW_OP_LIST_ROLE_POLICIES,
RGW_OP_DELETE_ROLE_POLICY,
+ RGW_OP_PUT_BUCKET_POLICY,
+ RGW_OP_GET_BUCKET_POLICY,
+ RGW_OP_DELETE_BUCKET_POLICY,
/* rgw specific */
RGW_OP_ADMIN_SET_METADATA,
@@ -1338,8 +1347,6 @@ struct RGWStorageStats
void dump(Formatter *f) const;
};
-struct req_state;
-
class RGWEnv;
/* Namespaced forward declarations. */
@@ -1348,12 +1355,14 @@ namespace rgw {
namespace s3 {
class RGWGetPolicyV2Extractor;
}
+ class Completer;
}
namespace io {
class BasicClient;
}
}
+
struct req_info {
RGWEnv *env;
RGWHTTPArgs args;
@@ -1701,7 +1710,7 @@ struct req_state {
const char *length;
int64_t content_length;
map<string, string> generic_attrs;
- struct rgw_err err;
+ rgw_err err;
bool expect_cont;
bool header_ended;
uint64_t obj_size;
@@ -1770,6 +1779,9 @@ struct req_state {
RGWAccessControlPolicy *bucket_acl;
RGWAccessControlPolicy *object_acl;
+ rgw::IAM::Environment env;
+ boost::optional<rgw::IAM::Policy> iam_policy;
+
/* Is the request made by an user marked as a system one?
* Being system user means we also have the admin status. */
bool system_request;
@@ -1805,8 +1817,15 @@ struct req_state {
req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u);
~req_state();
+
+ bool is_err() const { return err.is_err(); }
};
+void set_req_state_err(struct req_state*, int);
+void set_req_state_err(struct req_state*, int, const string&);
+void set_req_state_err(struct rgw_err&, int, const int);
+void dump(struct req_state*);
+
/** Store basic data on bucket */
struct RGWBucketEnt {
rgw_bucket bucket;
@@ -2132,17 +2151,38 @@ bool verify_user_permission(struct req_state * const s,
const int perm);
bool verify_user_permission(struct req_state * const s,
const int perm);
-extern bool verify_bucket_permission(struct req_state * s,
- RGWAccessControlPolicy * user_acl,
- RGWAccessControlPolicy * bucket_acl,
- int perm);
-extern bool verify_bucket_permission(struct req_state *s, int perm);
-extern bool verify_object_permission(struct req_state *s,
- RGWAccessControlPolicy * user_acl,
- RGWAccessControlPolicy * bucket_acl,
- RGWAccessControlPolicy * object_acl,
- int perm);
-extern bool verify_object_permission(struct req_state *s, int perm);
+bool verify_bucket_permission(
+ struct req_state * const s,
+ const rgw_bucket& bucket,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const uint64_t op);
+bool verify_bucket_permission(struct req_state * const s, const uint64_t op);
+bool verify_bucket_permission_no_policy(
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const int perm);
+bool verify_bucket_permission_no_policy(struct req_state * const s,
+ const int perm);
+extern bool verify_object_permission(
+ struct req_state * const s,
+ const rgw_obj& obj,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const uint64_t op);
+extern bool verify_object_permission(struct req_state *s, uint64_t op);
+extern bool verify_object_permission_no_policy(
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ int perm);
+extern bool verify_object_permission_no_policy(struct req_state *s,
+ int perm);
/** Convert an input URL into a sane object name
* by converting %-escaped strings into characters, etc*/
extern void rgw_uri_escape_char(char c, string& dst);
@@ -2168,5 +2208,12 @@ extern string calc_hash_sha256_close_stream(SHA256 **hash);
extern int rgw_parse_op_type_list(const string& str, uint32_t *perm);
-int match(const string& pattern, const string& input, int flag);
+namespace {
+ constexpr uint32_t MATCH_POLICY_ACTION = 0x01;
+ constexpr uint32_t MATCH_POLICY_RESOURCE = 0x02;
+ constexpr uint32_t MATCH_POLICY_ARN = 0x04;
+ constexpr uint32_t MATCH_POLICY_STRING = 0x08;
+}
+
+int match(const std::string& pattern, const std::string& input, uint32_t flag);
#endif
diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc
index 85ad290d020..4bb48da49dc 100644
--- a/src/rgw/rgw_coroutine.cc
+++ b/src/rgw/rgw_coroutine.cc
@@ -1,10 +1,11 @@
-
#include "common/ceph_json.h"
#include "rgw_coroutine.h"
#include "rgw_boost_asio_yield.h"
+// re-include our assert to clobber the system one; fix dout:
+#include "include/assert.h"
#define dout_subsys ceph_subsys_rgw
@@ -68,7 +69,7 @@ int RGWCompletionManager::get_next(void **user_info)
Mutex::Locker l(lock);
while (complete_reqs.empty()) {
cond.Wait(lock);
- if (going_down.read() != 0) {
+ if (going_down) {
return -ECANCELED;
}
}
@@ -94,7 +95,7 @@ void RGWCompletionManager::go_down()
for (auto cn : cns) {
cn->unregister();
}
- going_down.set(1);
+ going_down = true;
cond.Signal();
}
@@ -460,7 +461,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
bool canceled = false; // set on going_down
RGWCoroutinesEnv env;
- uint64_t run_context = run_context_count.inc();
+ uint64_t run_context = ++run_context_count;
lock.get_write();
set<RGWCoroutinesStack *>& context_stacks = run_contexts[run_context];
@@ -475,7 +476,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
env.manager = this;
env.scheduled_stacks = &scheduled_stacks;
- for (list<RGWCoroutinesStack *>::iterator iter = scheduled_stacks.begin(); iter != scheduled_stacks.end() && !going_down.read();) {
+ for (list<RGWCoroutinesStack *>::iterator iter = scheduled_stacks.begin(); iter != scheduled_stacks.end() && !going_down;) {
lock.get_write();
RGWCoroutinesStack *stack = *iter;
@@ -566,7 +567,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
if (ret < 0) {
ldout(cct, 0) << "ERROR: failed to clone shard, completion_mgr.get_next() returned ret=" << ret << dendl;
}
- if (going_down.read() > 0) {
+ if (going_down) {
ldout(cct, 5) << __func__ << "(): was stopped, exiting" << dendl;
ret = -ECANCELED;
canceled = true;
@@ -585,7 +586,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
}
lock.get_write();
- if (!context_stacks.empty() && !going_down.read()) {
+ if (!context_stacks.empty() && !going_down) {
JSONFormatter formatter(true);
formatter.open_array_section("context_stacks");
for (auto& s : context_stacks) {
@@ -595,7 +596,7 @@ int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
lderr(cct) << __func__ << "(): ERROR: deadlock detected, dumping remaining coroutines:\n";
formatter.flush(*_dout);
*_dout << dendl;
- assert(context_stacks.empty() || going_down.read()); // assert on deadlock
+ assert(context_stacks.empty() || going_down); // assert on deadlock
}
for (auto stack : context_stacks) {
diff --git a/src/rgw/rgw_coroutine.h b/src/rgw/rgw_coroutine.h
index 821ff55889a..66afa74b375 100644
--- a/src/rgw/rgw_coroutine.h
+++ b/src/rgw/rgw_coroutine.h
@@ -22,6 +22,8 @@
#include "rgw_common.h"
#include "rgw_boost_asio_coroutine.h"
+#include <atomic>
+
#define RGW_ASYNC_OPS_MGR_WINDOW 100
class RGWCoroutinesStack;
@@ -39,7 +41,7 @@ class RGWCompletionManager : public RefCountedObject {
SafeTimer timer;
- atomic_t going_down;
+ std::atomic<bool> going_down = { false };
map<void *, void *> waiters;
@@ -506,9 +508,9 @@ public:
class RGWCoroutinesManager {
CephContext *cct;
- atomic_t going_down;
+ std::atomic<bool> going_down = { false };
- atomic64_t run_context_count;
+ std::atomic<int64_t> run_context_count = { 0 };
map<uint64_t, set<RGWCoroutinesStack *> > run_contexts;
RWLock lock;
@@ -542,7 +544,8 @@ public:
int run(list<RGWCoroutinesStack *>& ops);
int run(RGWCoroutine *op);
void stop() {
- if (going_down.inc() == 1) {
+ bool expected = false;
+ if (going_down.compare_exchange_strong(expected, true)) {
completion_mgr->go_down();
}
}
diff --git a/src/rgw/rgw_cors_s3.h b/src/rgw/rgw_cors_s3.h
index cad423c10a2..1e60e651512 100644
--- a/src/rgw/rgw_cors_s3.h
+++ b/src/rgw/rgw_cors_s3.h
@@ -18,15 +18,12 @@
#include <map>
#include <string>
#include <iosfwd>
-#include <expat.h>
#include <include/types.h>
#include <common/Formatter.h>
#include "rgw_xml.h"
#include "rgw_cors.h"
-using namespace std;
-
class RGWCORSRule_S3 : public RGWCORSRule, public XMLObj
{
public:
diff --git a/src/rgw/rgw_cors_swift.h b/src/rgw/rgw_cors_swift.h
index 6aef5e13561..d2516dd0357 100644
--- a/src/rgw/rgw_cors_swift.h
+++ b/src/rgw/rgw_cors_swift.h
@@ -23,8 +23,6 @@
#include "rgw_cors.h"
-using namespace std;
-
class RGWCORSConfiguration_SWIFT : public RGWCORSConfiguration
{
public:
diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc
index 489ef589172..0e0be84c82c 100644
--- a/src/rgw/rgw_cr_rados.cc
+++ b/src/rgw/rgw_cr_rados.cc
@@ -65,7 +65,7 @@ void RGWAsyncRadosProcessor::start() {
}
void RGWAsyncRadosProcessor::stop() {
- going_down.set(1);
+ going_down = true;
m_tp.drain(&req_wq);
m_tp.stop();
for (auto iter = m_req_queue.begin(); iter != m_req_queue.end(); ++iter) {
@@ -116,14 +116,14 @@ int RGWSimpleRadosReadAttrsCR::request_complete()
int RGWAsyncPutSystemObj::_send_request()
{
- return store->put_system_obj_data(NULL, obj, bl, -1, exclusive);
+ return store->put_system_obj_data(NULL, obj, bl, -1, exclusive, objv_tracker);
}
RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
- const rgw_raw_obj& _obj, bool _exclusive,
- bufferlist& _bl) : RGWAsyncRadosRequest(caller, cn), store(_store),
- obj(_obj), exclusive(_exclusive),
- bl(_bl)
+ RGWObjVersionTracker *_objv_tracker, rgw_raw_obj& _obj,
+ bool _exclusive, bufferlist& _bl)
+ : RGWAsyncRadosRequest(caller, cn), store(_store), objv_tracker(_objv_tracker),
+ obj(_obj), exclusive(_exclusive), bl(_bl)
{
}
@@ -315,6 +315,40 @@ int RGWRadosRemoveOmapKeysCR::send_request() {
return ref.ioctx.aio_operate(ref.oid, cn->completion(), &op);
}
+RGWRadosRemoveCR::RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj)
+ : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj)
+{
+ set_description() << "remove dest=" << obj;
+}
+
+int RGWRadosRemoveCR::send_request()
+{
+ auto rados = store->get_rados_handle();
+ int r = rados->ioctx_create(obj.pool.name.c_str(), ioctx);
+ if (r < 0) {
+ lderr(cct) << "ERROR: failed to open pool (" << obj.pool.name << ") ret=" << r << dendl;
+ return r;
+ }
+ ioctx.locator_set_key(obj.loc);
+
+ set_status() << "send request";
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ cn = stack->create_completion_notifier();
+ return ioctx.aio_operate(obj.oid, cn->completion(), &op);
+}
+
+int RGWRadosRemoveCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
const rgw_raw_obj& _obj,
const string& _lock_name,
@@ -507,7 +541,6 @@ int RGWAsyncFetchRemoteObj::_send_request()
&key.instance, /* string *version_id, */
NULL, /* string *ptag, */
NULL, /* string *petag, */
- NULL, /* struct rgw_err *err, */
NULL, /* void (*progress_cb)(off_t, void *), */
NULL); /* void *progress_data*); */
@@ -627,7 +660,7 @@ int RGWContinuousLeaseCR::operate()
return set_cr_done();
}
reenter(this) {
- while (!going_down.read()) {
+ while (!going_down) {
yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval));
caller->set_sleeping(false); /* will only be relevant when we return, that's why we can do it early */
@@ -722,6 +755,29 @@ int RGWRadosTimelogTrimCR::request_complete()
return r;
}
+
+RGWSyncLogTrimCR::RGWSyncLogTrimCR(RGWRados *store, const std::string& oid,
+ const std::string& to_marker,
+ std::string *last_trim_marker)
+ : RGWRadosTimelogTrimCR(store, oid, real_time{}, real_time{},
+ std::string{}, to_marker),
+ cct(store->ctx()), last_trim_marker(last_trim_marker)
+{
+}
+
+int RGWSyncLogTrimCR::request_complete()
+{
+ int r = RGWRadosTimelogTrimCR::request_complete();
+ if (r < 0 && r != -ENODATA) {
+ return r;
+ }
+ if (*last_trim_marker < to_marker) {
+ *last_trim_marker = to_marker;
+ }
+ return 0;
+}
+
+
int RGWAsyncStatObj::_send_request()
{
rgw_raw_obj raw_obj;
diff --git a/src/rgw/rgw_cr_rados.h b/src/rgw/rgw_cr_rados.h
index 8496201e0cd..a892b2a6db0 100644
--- a/src/rgw/rgw_cr_rados.h
+++ b/src/rgw/rgw_cr_rados.h
@@ -1,11 +1,15 @@
#ifndef CEPH_RGW_CR_RADOS_H
#define CEPH_RGW_CR_RADOS_H
+#include <boost/intrusive_ptr.hpp>
+#include "include/assert.h"
#include "rgw_coroutine.h"
#include "rgw_rados.h"
#include "common/WorkQueue.h"
#include "common/Throttle.h"
+#include <atomic>
+
class RGWAsyncRadosRequest : public RefCountedObject {
RGWCoroutine *caller;
RGWAioCompletionNotifier *notifier;
@@ -57,7 +61,7 @@ public:
class RGWAsyncRadosProcessor {
deque<RGWAsyncRadosRequest *> m_req_queue;
- atomic_t going_down;
+ std::atomic<bool> going_down = { false };
protected:
RGWRados *store;
ThreadPool m_tp;
@@ -91,7 +95,7 @@ public:
void queue(RGWAsyncRadosRequest *req);
bool is_going_down() {
- return (going_down.read() != 0);
+ return going_down;
}
};
@@ -117,6 +121,7 @@ public:
class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest {
RGWRados *store;
+ RGWObjVersionTracker *objv_tracker;
rgw_raw_obj obj;
bool exclusive;
bufferlist bl;
@@ -125,8 +130,8 @@ protected:
int _send_request() override;
public:
RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
- const rgw_raw_obj& _obj, bool _exclusive,
- bufferlist& _bl);
+ RGWObjVersionTracker *_objv_tracker, rgw_raw_obj& _obj,
+ bool _exclusive, bufferlist& _bl);
};
class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest {
@@ -187,16 +192,18 @@ class RGWSimpleRadosReadCR : public RGWSimpleCoroutine {
T *result;
/// on ENOENT, call handle_data() with an empty object instead of failing
const bool empty_on_enoent;
+ RGWObjVersionTracker *objv_tracker;
RGWAsyncGetSystemObj *req{nullptr};
public:
RGWSimpleRadosReadCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
const rgw_raw_obj& _obj,
- T *_result, bool empty_on_enoent = true)
+ T *_result, bool empty_on_enoent = true,
+ RGWObjVersionTracker *objv_tracker = nullptr)
: RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
obj_ctx(store), obj(_obj), result(_result),
- empty_on_enoent(empty_on_enoent) {}
+ empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {}
~RGWSimpleRadosReadCR() override {
request_cleanup();
}
@@ -220,7 +227,7 @@ template <class T>
int RGWSimpleRadosReadCR<T>::send_request()
{
req = new RGWAsyncGetSystemObj(this, stack->create_completion_notifier(),
- store, &obj_ctx, NULL,
+ store, &obj_ctx, objv_tracker,
obj,
&bl, 0, -1);
if (pattrs) {
@@ -303,17 +310,16 @@ class RGWSimpleRadosWriteCR : public RGWSimpleCoroutine {
bufferlist bl;
rgw_raw_obj obj;
+ RGWObjVersionTracker *objv_tracker;
- RGWAsyncPutSystemObj *req;
+ RGWAsyncPutSystemObj *req{nullptr};
public:
RGWSimpleRadosWriteCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
const rgw_raw_obj& _obj,
- const T& _data) : RGWSimpleCoroutine(_store->ctx()),
- async_rados(_async_rados),
- store(_store),
- obj(_obj),
- req(NULL) {
+ const T& _data, RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados),
+ store(_store), obj(_obj), objv_tracker(objv_tracker) {
::encode(_data, bl);
}
@@ -330,7 +336,7 @@ public:
int send_request() override {
req = new RGWAsyncPutSystemObj(this, stack->create_completion_notifier(),
- store, obj, false, bl);
+ store, objv_tracker, obj, false, bl);
async_rados->queue(req);
return 0;
}
@@ -462,6 +468,19 @@ public:
}
};
+class RGWRadosRemoveCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ librados::IoCtx ioctx;
+ const rgw_raw_obj obj;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj);
+
+ int send_request();
+ int request_complete();
+};
+
class RGWSimpleRadosLockCR : public RGWSimpleCoroutine {
RGWAsyncRadosProcessor *async_rados;
RGWRados *store;
@@ -1014,7 +1033,7 @@ class RGWContinuousLeaseCR : public RGWCoroutine {
int interval;
Mutex lock;
- atomic_t going_down;
+ std::atomic<bool> going_down = { false };
bool locked{false};
RGWCoroutine *caller;
@@ -1044,7 +1063,7 @@ public:
}
void go_down() {
- going_down.set(1);
+ going_down = true;
wakeup();
}
@@ -1091,6 +1110,16 @@ class RGWRadosTimelogTrimCR : public RGWSimpleCoroutine {
int request_complete() override;
};
+// wrapper to update last_trim_marker on success
+class RGWSyncLogTrimCR : public RGWRadosTimelogTrimCR {
+ CephContext *cct;
+ std::string *last_trim_marker;
+ public:
+ RGWSyncLogTrimCR(RGWRados *store, const std::string& oid,
+ const std::string& to_marker, std::string *last_trim_marker);
+ int request_complete() override;
+};
+
class RGWAsyncStatObj : public RGWAsyncRadosRequest {
RGWRados *store;
RGWBucketInfo bucket_info;
diff --git a/src/rgw/rgw_crypt.cc b/src/rgw/rgw_crypt.cc
index cf48e27751f..0d57f4da270 100644
--- a/src/rgw/rgw_crypt.cc
+++ b/src/rgw/rgw_crypt.cc
@@ -1053,7 +1053,9 @@ static const crypt_option_names crypt_options[] = {
static boost::string_ref get_crypt_attribute(
RGWEnv* env,
- map<string, post_form_part, const ltstr_nocase>* parts,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
crypt_option_e option)
{
static_assert(
@@ -1079,10 +1081,12 @@ static boost::string_ref get_crypt_attribute(
int rgw_s3_prepare_encrypt(struct req_state* s,
- map<string, bufferlist>& attrs,
- map<string, post_form_part, const ltstr_nocase>* parts,
- std::unique_ptr<BlockCrypt>* block_crypt,
- std::map<std::string, std::string>& crypt_http_responses)
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string, std::string>& crypt_http_responses)
{
int res = 0;
crypt_http_responses.clear();
diff --git a/src/rgw/rgw_crypt.h b/src/rgw/rgw_crypt.h
index a6b7df0a42a..1774983758d 100644
--- a/src/rgw/rgw_crypt.h
+++ b/src/rgw/rgw_crypt.h
@@ -7,6 +7,7 @@
#define CEPH_RGW_CRYPT_H
#include <rgw/rgw_op.h>
+#include <rgw/rgw_rest.h>
#include <rgw/rgw_rest_s3.h>
#include <boost/utility/string_ref.hpp>
@@ -136,14 +137,18 @@ public:
int rgw_s3_prepare_encrypt(struct req_state* s,
- map<string, bufferlist>& attrs,
- map<string, post_form_part, const ltstr_nocase>* parts,
- std::unique_ptr<BlockCrypt>* block_crypt,
- std::map<std::string, std::string>& crypt_http_responses);
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string,
+ std::string>& crypt_http_responses);
int rgw_s3_prepare_decrypt(struct req_state* s,
- map<string, bufferlist>& attrs,
- std::unique_ptr<BlockCrypt>* block_crypt,
- std::map<std::string, std::string>& crypt_http_responses);
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string,
+ std::string>& crypt_http_responses);
#endif
diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc
index cbe3a91d8e1..e3f3b079365 100644
--- a/src/rgw/rgw_data_sync.cc
+++ b/src/rgw/rgw_data_sync.cc
@@ -456,22 +456,25 @@ bool RGWListRemoteDataLogCR::spawn_next() {
}
class RGWInitDataSyncStatusCoroutine : public RGWCoroutine {
+ static constexpr uint32_t lock_duration = 30;
RGWDataSyncEnv *sync_env;
-
RGWRados *store;
+ const rgw_pool& pool;
+ const uint32_t num_shards;
string sync_status_oid;
string lock_name;
string cookie;
- rgw_data_sync_info status;
+ rgw_data_sync_status *status;
map<int, RGWDataChangesLogInfo> shards_info;
public:
- RGWInitDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
- uint32_t _num_shards) : RGWCoroutine(_sync_env->cct),
- sync_env(_sync_env), store(sync_env->store) {
+ RGWInitDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env, uint32_t num_shards,
+ rgw_data_sync_status *status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), store(sync_env->store),
+ pool(store->get_zone_params().log_pool),
+ num_shards(num_shards), status(status) {
lock_name = "sync_lock";
- status.num_shards = _num_shards;
#define COOKIE_LEN 16
char buf[COOKIE_LEN + 1];
@@ -485,33 +488,32 @@ public:
int operate() override {
int ret;
reenter(this) {
- yield {
- uint32_t lock_duration = 30;
- call(new RGWSimpleRadosLockCR(sync_env->async_rados, store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid),
- lock_name, cookie, lock_duration));
- if (retcode < 0) {
- ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl;
- return set_cr_error(retcode);
- }
+ using LockCR = RGWSimpleRadosLockCR;
+ yield call(new LockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie, lock_duration));
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl;
+ return set_cr_error(retcode);
}
- yield {
- call(new RGWSimpleRadosWriteCR<rgw_data_sync_info>(sync_env->async_rados,
- store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid),
- status));
- }
- yield { /* take lock again, we just recreated the object */
- uint32_t lock_duration = 30;
- call(new RGWSimpleRadosLockCR(sync_env->async_rados,
- store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid),
- lock_name, cookie, lock_duration));
- if (retcode < 0) {
- ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl;
- return set_cr_error(retcode);
- }
+ using WriteInfoCR = RGWSimpleRadosWriteCR<rgw_data_sync_info>;
+ yield call(new WriteInfoCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ status->sync_info));
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: failed to write sync status info with " << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ /* take lock again, we just recreated the object */
+ yield call(new LockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie, lock_duration));
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: failed to take a lock on " << sync_status_oid << dendl;
+ return set_cr_error(retcode);
}
+
/* fetch current position in logs */
yield {
RGWRESTConn *conn = store->get_zone_conn_by_id(sync_env->source_zone);
@@ -519,46 +521,48 @@ public:
ldout(cct, 0) << "ERROR: connection to zone " << sync_env->source_zone << " does not exist!" << dendl;
return set_cr_error(-EIO);
}
- for (int i = 0; i < (int)status.num_shards; i++) {
+ for (uint32_t i = 0; i < num_shards; i++) {
spawn(new RGWReadRemoteDataLogShardInfoCR(sync_env, i, &shards_info[i]), true);
- }
+ }
}
while (collect(&ret, NULL)) {
- if (ret < 0) {
- return set_state(RGWCoroutine_Error);
- }
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to read remote data log shards" << dendl;
+ return set_state(RGWCoroutine_Error);
+ }
yield;
}
yield {
- for (int i = 0; i < (int)status.num_shards; i++) {
- rgw_data_sync_marker marker;
+ for (uint32_t i = 0; i < num_shards; i++) {
RGWDataChangesLogInfo& info = shards_info[i];
- marker.next_step_marker = info.marker;
- marker.timestamp = info.last_update;
- spawn(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>(sync_env->async_rados, store,
- rgw_raw_obj(store->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, i)),
- marker), true);
+ auto& marker = status->sync_markers[i];
+ marker.next_step_marker = info.marker;
+ marker.timestamp = info.last_update;
+ const auto& oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, i);
+ using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_data_sync_marker>;
+ spawn(new WriteMarkerCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, oid}, marker), true);
}
}
- yield {
- status.state = rgw_data_sync_info::StateBuildingFullSyncMaps;
- call(new RGWSimpleRadosWriteCR<rgw_data_sync_info>(sync_env->async_rados, store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid),
- status));
- }
- yield { /* unlock */
- call(new RGWSimpleRadosUnlockCR(sync_env->async_rados,
- store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_status_oid),
- lock_name, cookie));
- }
while (collect(&ret, NULL)) {
- if (ret < 0) {
- return set_state(RGWCoroutine_Error);
- }
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to write data sync status markers" << dendl;
+ return set_state(RGWCoroutine_Error);
+ }
yield;
}
- drain_all();
+
+ status->sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps;
+ yield call(new WriteInfoCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ status->sync_info));
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: failed to write sync status info with " << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ yield call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie));
return set_cr_done();
}
return 0;
@@ -666,6 +670,7 @@ int RGWRemoteDataLog::read_sync_status(rgw_data_sync_status *sync_status)
int RGWRemoteDataLog::init_sync_status(int num_shards)
{
+ rgw_data_sync_status sync_status;
RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
int ret = http_manager.set_threaded();
@@ -675,7 +680,7 @@ int RGWRemoteDataLog::init_sync_status(int num_shards)
}
RGWDataSyncEnv sync_env_local = sync_env;
sync_env_local.http_manager = &http_manager;
- ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards));
+ ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards, &sync_status));
http_manager.stop();
return ret;
}
@@ -1240,11 +1245,6 @@ public:
int incremental_sync() {
reenter(&incremental_cr) {
- error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store,
- rgw_raw_obj(pool, error_oid),
- 1 /* no buffer */);
- error_repo->get();
- spawn(error_repo, false);
yield init_lease_cr();
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
@@ -1256,6 +1256,11 @@ public:
yield;
}
set_status("lease acquired");
+ error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store,
+ rgw_raw_obj(pool, error_oid),
+ 1 /* no buffer */);
+ error_repo->get();
+ spawn(error_repo, false);
logger.log("inc sync");
set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker));
do {
@@ -1458,20 +1463,12 @@ public:
/* state: init status */
if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateInit) {
ldout(sync_env->cct, 20) << __func__ << "(): init" << dendl;
- yield call(new RGWInitDataSyncStatusCoroutine(sync_env, sync_status.sync_info.num_shards));
+ yield call(new RGWInitDataSyncStatusCoroutine(sync_env, num_shards, &sync_status));
if (retcode < 0) {
ldout(sync_env->cct, 0) << "ERROR: failed to init sync, retcode=" << retcode << dendl;
return set_cr_error(retcode);
}
- sync_status.sync_info.num_shards = num_shards;
- sync_status.sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps;
- /* update new state */
- yield call(set_sync_info_cr());
-
- if (retcode < 0) {
- ldout(sync_env->cct, 0) << "ERROR: failed to write sync status, retcode=" << retcode << dendl;
- return set_cr_error(retcode);
- }
+ // sets state = StateBuildingFullSyncMaps
*reset_backoff = true;
}
@@ -2894,6 +2891,7 @@ string RGWBucketSyncStatusManager::status_oid(const string& source_zone,
}
+// TODO: move into rgw_data_sync_trim.cc
#undef dout_prefix
#define dout_prefix (*_dout << "data trim: ")
@@ -2939,28 +2937,7 @@ void take_min_markers(IterIn first, IterIn last, IterOut dest)
}
}
-// wrapper to update last_trim_marker on success
-class LastTimelogTrimCR : public RGWRadosTimelogTrimCR {
- CephContext *cct;
- std::string *last_trim_marker;
- public:
- LastTimelogTrimCR(RGWRados *store, const std::string& oid,
- const std::string& to_marker, std::string *last_trim_marker)
- : RGWRadosTimelogTrimCR(store, oid, real_time{}, real_time{},
- std::string{}, to_marker),
- cct(store->ctx()), last_trim_marker(last_trim_marker)
- {}
- int request_complete() override {
- int r = RGWRadosTimelogTrimCR::request_complete();
- if (r < 0 && r != -ENODATA) {
- ldout(cct, 1) << "failed to trim datalog: " << cpp_strerror(r) << dendl;
- return r;
- }
- ldout(cct, 10) << "datalog trimmed to marker " << to_marker << dendl;
- *last_trim_marker = to_marker;
- return 0;
- }
-};
+} // anonymous namespace
class DataLogTrimCR : public RGWCoroutine {
RGWRados *store;
@@ -3039,7 +3016,7 @@ int DataLogTrimCR::operate()
ldout(cct, 10) << "trimming log shard " << i
<< " at marker=" << stable
<< " last_trim=" << last_trim[i] << dendl;
- using TrimCR = LastTimelogTrimCR;
+ using TrimCR = RGWSyncLogTrimCR;
spawn(new TrimCR(store, store->data_log->get_oid(i),
stable, &last_trim[i]),
true);
@@ -3103,8 +3080,6 @@ int DataLogTrimPollCR::operate()
return 0;
}
-} // anonymous namespace
-
RGWCoroutine* create_data_log_trim_cr(RGWRados *store,
RGWHTTPManager *http,
int num_shards, utime_t interval)
diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc
index be3d356ec41..f79dd0a78b7 100644
--- a/src/rgw/rgw_file.cc
+++ b/src/rgw/rgw_file.cc
@@ -27,6 +27,8 @@
#include "rgw_file.h"
#include "rgw_lib_frontend.h"
+#include <atomic>
+
#define dout_subsys ceph_subsys_rgw
using namespace rgw;
@@ -37,7 +39,7 @@ namespace rgw {
const string RGWFileHandle::root_name = "/";
- atomic<uint32_t> RGWLibFS::fs_inst_counter;
+ std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
uint32_t RGWLibFS::write_completion_interval_s = 10;
@@ -1148,6 +1150,8 @@ namespace rgw {
int rc = write_finish(FLAG_LOCKED);
flags &= ~FLAG_OPEN;
+ flags &= ~FLAG_STATELESS_OPEN;
+
return rc;
} /* RGWFileHandle::close */
@@ -1417,7 +1421,6 @@ int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
{
RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
fs->close();
- fs->rele();
return 0;
}
diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h
index 5d964f9c2aa..1edc37a9f60 100644
--- a/src/rgw/rgw_file.h
+++ b/src/rgw/rgw_file.h
@@ -282,7 +282,7 @@ namespace rgw {
variant_type = directory();
flags |= FLAG_BUCKET;
} else {
- bucket = (parent->flags & FLAG_BUCKET) ? parent
+ bucket = parent->is_bucket() ? parent
: parent->bucket;
if (flags & FLAG_DIRECTORY) {
fh.fh_type = RGW_FS_TYPE_DIRECTORY;
@@ -410,7 +410,7 @@ namespace rgw {
const std::string& bucket_name() const {
if (is_root())
return root_name;
- if (flags & FLAG_BUCKET)
+ if (is_bucket())
return name;
return bucket->object_name();
}
@@ -506,7 +506,7 @@ namespace rgw {
int open(uint32_t gsh_flags) {
lock_guard guard(mtx);
- if (! (flags & FLAG_OPEN)) {
+ if (! is_open()) {
if (gsh_flags & RGW_OPEN_FLAG_V3) {
flags |= FLAG_STATELESS_OPEN;
}
@@ -745,7 +745,7 @@ namespace rgw {
RGWUserInfo user;
RGWAccessKey key; // XXXX acc_key
- static atomic<uint32_t> fs_inst_counter;
+ static std::atomic<uint32_t> fs_inst_counter;
static uint32_t write_completion_interval_s;
std::string fsid;
@@ -778,7 +778,7 @@ namespace rgw {
}
void operator()() {
- rgw_fh.write_finish();
+ rgw_fh.close(); /* will finish in-progress write */
rgw_fh.get_fs()->unref(&rgw_fh);
}
};
@@ -1449,9 +1449,9 @@ public:
op = this;
}
- virtual bool only_bucket() override { return false; }
+ bool only_bucket() override { return false; }
- virtual int op_init() override {
+ int op_init() override {
// assign store, s, and dialect_handler
RGWObjectCtx* rados_ctx
= static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
@@ -1462,7 +1462,7 @@ public:
return 0;
}
- virtual int header_init() override {
+ int header_init() override {
struct req_state* s = get_state();
s->info.method = "GET";
s->op = OP_GET;
@@ -1484,12 +1484,12 @@ public:
return 0;
}
- virtual int get_params() override {
+ int get_params() override {
max = default_max;
return 0;
}
- virtual void send_response() override {
+ void send_response() override {
valid = true;
if ((objs.size() > 1) ||
(! objs.empty() &&
diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc
index c46f693daea..8fb461292dc 100644
--- a/src/rgw/rgw_gc.cc
+++ b/src/rgw/rgw_gc.cc
@@ -253,7 +253,7 @@ int RGWGC::process()
bool RGWGC::going_down()
{
- return (down_flag.read() != 0);
+ return down_flag;
}
void RGWGC::start_processor()
@@ -264,7 +264,7 @@ void RGWGC::start_processor()
void RGWGC::stop_processor()
{
- down_flag.set(1);
+ down_flag = true;
if (worker) {
worker->stop();
worker->join();
diff --git a/src/rgw/rgw_gc.h b/src/rgw/rgw_gc.h
index ca48a6e75e5..491796b50e8 100644
--- a/src/rgw/rgw_gc.h
+++ b/src/rgw/rgw_gc.h
@@ -6,7 +6,6 @@
#include "include/types.h"
-#include "include/atomic.h"
#include "include/rados/librados.hpp"
#include "common/Mutex.h"
#include "common/Cond.h"
@@ -15,12 +14,14 @@
#include "rgw_rados.h"
#include "cls/rgw/cls_rgw_types.h"
+#include <atomic>
+
class RGWGC {
CephContext *cct;
RGWRados *store;
int max_objs;
string *obj_names;
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
int tag_index(const string& tag);
diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc
index 070110e5099..81c4c6ef6bc 100644
--- a/src/rgw/rgw_http_client.cc
+++ b/src/rgw/rgw_http_client.cc
@@ -16,6 +16,8 @@
#include "rgw_coroutine.h"
+#include <atomic>
+
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
@@ -24,7 +26,7 @@ struct rgw_http_req_data : public RefCountedObject {
curl_slist *h;
uint64_t id;
int ret;
- atomic_t done;
+ std::atomic<bool> done = { false };
RGWHTTPClient *client;
void *user_info;
bool registered;
@@ -58,12 +60,12 @@ struct rgw_http_req_data : public RefCountedObject {
easy_handle = NULL;
h = NULL;
- done.set(1);
+ done = true;
cond.Signal();
}
bool is_done() {
- return done.read() != 0;
+ return done;
}
int get_retcode() {
@@ -900,14 +902,14 @@ int RGWHTTPManager::set_threaded()
void RGWHTTPManager::stop()
{
- if (is_stopped.read()) {
+ if (is_stopped) {
return;
}
- is_stopped.set(1);
+ is_stopped = true;
if (is_threaded) {
- going_down.set(1);
+ going_down = true;
signal_thread();
reqs_thread->join();
delete reqs_thread;
@@ -935,7 +937,7 @@ void *RGWHTTPManager::reqs_thread_entry()
ldout(cct, 20) << __func__ << ": start" << dendl;
- while (!going_down.read()) {
+ while (!going_down) {
int ret = do_curl_wait(cct, (CURLM *)multi_handle, thread_pipe[0]);
if (ret < 0) {
dout(0) << "ERROR: do_curl_wait() returned: " << ret << dendl;
diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h
index 4266b7b50ba..cbe4f3d0312 100644
--- a/src/rgw/rgw_http_client.h
+++ b/src/rgw/rgw_http_client.h
@@ -6,10 +6,11 @@
#include "common/RWLock.h"
#include "common/Cond.h"
-#include "include/atomic.h"
#include "rgw_common.h"
#include "rgw_string.h"
+#include <atomic>
+
using param_pair_t = pair<string, string>;
using param_vec_t = vector<param_pair_t>;
@@ -33,7 +34,7 @@ class RGWHTTPClient
string last_url;
bool verify_ssl; // Do not validate self signed certificates, default to false
- atomic_t stopped;
+ std::atomic<unsigned> stopped { 0 };
protected:
CephContext *cct;
@@ -219,8 +220,8 @@ class RGWHTTPManager {
RGWCompletionManager *completion_mgr;
void *multi_handle;
bool is_threaded;
- atomic_t going_down;
- atomic_t is_stopped;
+ std::atomic<unsigned> going_down { 0 };
+ std::atomic<unsigned> is_stopped { 0 };
RWLock reqs_lock;
map<uint64_t, rgw_http_req_data *> reqs;
diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h
index a0423bb48fd..e2aec310c88 100644
--- a/src/rgw/rgw_http_errors.h
+++ b/src/rgw/rgw_http_errors.h
@@ -6,144 +6,11 @@
#include "rgw_common.h"
-struct rgw_http_errors {
- int err_no;
- int http_ret;
- const char *s3_code;
-};
+typedef const std::map<int,const std::pair<int, const char*>> rgw_http_errors;
-const static struct rgw_http_errors RGW_HTTP_ERRORS[] = {
- { 0, 200, "" },
- { STATUS_CREATED, 201, "Created" },
- { STATUS_ACCEPTED, 202, "Accepted" },
- { STATUS_NO_CONTENT, 204, "NoContent" },
- { STATUS_PARTIAL_CONTENT, 206, "" },
- { ERR_PERMANENT_REDIRECT, 301, "PermanentRedirect" },
- { ERR_WEBSITE_REDIRECT, 301, "WebsiteRedirect" },
- { STATUS_REDIRECT, 303, "" },
- { ERR_NOT_MODIFIED, 304, "NotModified" },
- { EINVAL, 400, "InvalidArgument" },
- { ERR_INVALID_REQUEST, 400, "InvalidRequest" },
- { ERR_INVALID_DIGEST, 400, "InvalidDigest" },
- { ERR_BAD_DIGEST, 400, "BadDigest" },
- { ERR_INVALID_BUCKET_NAME, 400, "InvalidBucketName" },
- { ERR_INVALID_OBJECT_NAME, 400, "InvalidObjectName" },
- { ERR_UNRESOLVABLE_EMAIL, 400, "UnresolvableGrantByEmailAddress" },
- { ERR_INVALID_PART, 400, "InvalidPart" },
- { ERR_INVALID_PART_ORDER, 400, "InvalidPartOrder" },
- { ERR_REQUEST_TIMEOUT, 400, "RequestTimeout" },
- { ERR_TOO_LARGE, 400, "EntityTooLarge" },
- { ERR_TOO_SMALL, 400, "EntityTooSmall" },
- { ERR_TOO_MANY_BUCKETS, 400, "TooManyBuckets" },
- { ERR_MALFORMED_XML, 400, "MalformedXML" },
- { ERR_AMZ_CONTENT_SHA256_MISMATCH, 400, "XAmzContentSHA256Mismatch" },
- { ERR_MALFORMED_DOC, 400, "MalformedPolicyDocument" },
- { ERR_LENGTH_REQUIRED, 411, "MissingContentLength" },
- { EACCES, 403, "AccessDenied" },
- { EPERM, 403, "AccessDenied" },
- { ERR_SIGNATURE_NO_MATCH, 403, "SignatureDoesNotMatch" },
- { ERR_INVALID_ACCESS_KEY, 403, "InvalidAccessKeyId" },
- { ERR_USER_SUSPENDED, 403, "UserSuspended" },
- { ERR_REQUEST_TIME_SKEWED, 403, "RequestTimeTooSkewed" },
- { ERR_QUOTA_EXCEEDED, 403, "QuotaExceeded" },
- { ENOENT, 404, "NoSuchKey" },
- { ERR_NO_SUCH_BUCKET, 404, "NoSuchBucket" },
- { ERR_NO_SUCH_WEBSITE_CONFIGURATION, 404, "NoSuchWebsiteConfiguration" },
- { ERR_NO_SUCH_UPLOAD, 404, "NoSuchUpload" },
- { ERR_NOT_FOUND, 404, "Not Found"},
- { ERR_NO_SUCH_LC, 404, "NoSuchLifecycleConfiguration"},
- { ERR_NO_ROLE_FOUND, 404, "NoSuchEntity"},
- { ERR_METHOD_NOT_ALLOWED, 405, "MethodNotAllowed" },
- { ETIMEDOUT, 408, "RequestTimeout" },
- { EEXIST, 409, "BucketAlreadyExists" },
- { ERR_USER_EXIST, 409, "UserAlreadyExists" },
- { ERR_EMAIL_EXIST, 409, "EmailExists" },
- { ERR_KEY_EXIST, 409, "KeyExists"},
- { ERR_ROLE_EXISTS, 409, "EntityAlreadyExists"},
- { ERR_DELETE_CONFLICT, 409, "DeleteConflict"},
- { ERR_INVALID_SECRET_KEY, 400, "InvalidSecretKey"},
- { ERR_INVALID_KEY_TYPE, 400, "InvalidKeyType"},
- { ERR_INVALID_CAP, 400, "InvalidCapability"},
- { ERR_INVALID_TENANT_NAME, 400, "InvalidTenantName" },
- { ENOTEMPTY, 409, "BucketNotEmpty" },
- { ERR_PRECONDITION_FAILED, 412, "PreconditionFailed" },
- { ERANGE, 416, "InvalidRange" },
- { ERR_UNPROCESSABLE_ENTITY, 422, "UnprocessableEntity" },
- { ERR_LOCKED, 423, "Locked" },
- { ERR_INTERNAL_ERROR, 500, "InternalError" },
- { ERR_NOT_IMPLEMENTED, 501, "NotImplemented" },
- { ERR_SERVICE_UNAVAILABLE, 503, "ServiceUnavailable"}
-};
-
-const static struct rgw_http_errors RGW_HTTP_SWIFT_ERRORS[] = {
- { EACCES, 403, "AccessDenied" },
- { EPERM, 401, "AccessDenied" },
- { ERR_USER_SUSPENDED, 401, "UserSuspended" },
- { ERR_INVALID_UTF8, 412, "Invalid UTF8" },
- { ERR_BAD_URL, 412, "Bad URL" },
- { ERR_NOT_SLO_MANIFEST, 400, "Not an SLO manifest" },
- { ERR_QUOTA_EXCEEDED, 413, "QuotaExceeded" }
-};
-
-struct rgw_http_status_code {
- int code;
- const char *name;
-};
-
-const static struct rgw_http_status_code http_codes[] = {
- { 100, "Continue" },
- { 200, "OK" },
- { 201, "Created" },
- { 202, "Accepted" },
- { 204, "No Content" },
- { 205, "Reset Content" },
- { 206, "Partial Content" },
- { 207, "Multi Status" },
- { 208, "Already Reported" },
- { 300, "Multiple Choices" },
- { 301, "Moved Permanently" },
- { 302, "Found" },
- { 303, "See Other" },
- { 304, "Not Modified" },
- { 305, "User Proxy" },
- { 306, "Switch Proxy" },
- { 307, "Temporary Redirect" },
- { 308, "Permanent Redirect" },
- { 400, "Bad Request" },
- { 401, "Unauthorized" },
- { 402, "Payment Required" },
- { 403, "Forbidden" },
- { 404, "Not Found" },
- { 405, "Method Not Allowed" },
- { 406, "Not Acceptable" },
- { 407, "Proxy Authentication Required" },
- { 408, "Request Timeout" },
- { 409, "Conflict" },
- { 410, "Gone" },
- { 411, "Length Required" },
- { 412, "Precondition Failed" },
- { 413, "Request Entity Too Large" },
- { 414, "Request-URI Too Long" },
- { 415, "Unsupported Media Type" },
- { 416, "Requested Range Not Satisfiable" },
- { 417, "Expectation Failed" },
- { 422, "Unprocessable Entity" },
- { 500, "Internal Server Error" },
- { 501, "Not Implemented" },
- { 0, NULL },
-};
-
-#define ARRAY_LEN(arr) (sizeof(arr) / sizeof(arr[0]))
-
-static inline const struct rgw_http_errors *search_err(int err_no, const struct rgw_http_errors *errs, int len)
-{
- for (int i = 0; i < len; ++i, ++errs) {
- if (err_no == errs->err_no)
- return errs;
- }
- return NULL;
-}
+extern rgw_http_errors rgw_http_s3_errors;
+extern rgw_http_errors rgw_http_swift_errors;
static inline int rgw_http_error_to_errno(int http_err)
{
diff --git a/src/rgw/rgw_iam_policy.cc b/src/rgw/rgw_iam_policy.cc
new file mode 100644
index 00000000000..c12e243f1e0
--- /dev/null
+++ b/src/rgw/rgw_iam_policy.cc
@@ -0,0 +1,1514 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#include <cstring>
+#include <regex>
+#include <sstream>
+#include <stack>
+#include <utility>
+
+#include "rapidjson/reader.h"
+
+#include "rgw_auth.h"
+#include "rgw_iam_policy.h"
+
+namespace {
+constexpr int dout_subsys = ceph_subsys_rgw;
+}
+
+using std::bitset;
+using std::find;
+using std::int64_t;
+using std::move;
+using std::pair;
+using std::regex;
+using std::regex_match;
+using std::size_t;
+using std::smatch;
+using std::string;
+using std::stringstream;
+using std::ostream;
+using std::uint16_t;
+using std::uint64_t;
+using std::unordered_map;
+
+using boost::container::flat_set;
+using boost::none;
+using boost::optional;
+
+using rapidjson::BaseReaderHandler;
+using rapidjson::UTF8;
+using rapidjson::SizeType;
+using rapidjson::Reader;
+using rapidjson::kParseCommentsFlag;
+using rapidjson::kParseNumbersAsStringsFlag;
+using rapidjson::StringStream;
+using rapidjson::ParseResult;
+
+using rgw::auth::Principal;
+
+namespace rgw {
+namespace IAM {
+#include "rgw_iam_policy_keywords.frag.cc"
+
+struct actpair {
+ const char* name;
+ const uint64_t bit;
+};
+
+namespace {
+optional<Partition> to_partition(const smatch::value_type& p,
+ bool wildcards) {
+ if (p == "aws") {
+ return Partition::aws;
+ } else if (p == "aws-cn") {
+ return Partition::aws_cn;
+ } else if (p == "aws-us-gov") {
+ return Partition::aws_us_gov;
+ } else if (p == "*" && wildcards) {
+ return Partition::wildcard;
+ } else {
+ return none;
+ }
+
+ ceph_abort();
+}
+
+optional<Service> to_service(const smatch::value_type& s,
+ bool wildcards) {
+ static const unordered_map<string, Service> services = {
+ { "acm", Service::acm },
+ { "apigateway", Service::apigateway },
+ { "appstream", Service::appstream },
+ { "artifact", Service::artifact },
+ { "autoscaling", Service::autoscaling },
+ { "aws-marketplace", Service::aws_marketplace },
+ { "aws-marketplace-management",
+ Service::aws_marketplace_management },
+ { "aws-portal", Service::aws_portal },
+ { "cloudformation", Service::cloudformation },
+ { "cloudfront", Service::cloudfront },
+ { "cloudhsm", Service::cloudhsm },
+ { "cloudsearch", Service::cloudsearch },
+ { "cloudtrail", Service::cloudtrail },
+ { "cloudwatch", Service::cloudwatch },
+ { "codebuild", Service::codebuild },
+ { "codecommit", Service::codecommit },
+ { "codedeploy", Service::codedeploy },
+ { "codepipeline", Service::codepipeline },
+ { "cognito-identity", Service::cognito_identity },
+ { "cognito-idp", Service::cognito_idp },
+ { "cognito-sync", Service::cognito_sync },
+ { "config", Service::config },
+ { "datapipeline", Service::datapipeline },
+ { "devicefarm", Service::devicefarm },
+ { "directconnect", Service::directconnect },
+ { "dms", Service::dms },
+ { "ds", Service::ds },
+ { "dynamodb", Service::dynamodb },
+ { "ec2", Service::ec2 },
+ { "ecr", Service::ecr },
+ { "ecs", Service::ecs },
+ { "elasticache", Service::elasticache },
+ { "elasticbeanstalk", Service::elasticbeanstalk },
+ { "elasticfilesystem", Service::elasticfilesystem },
+ { "elasticloadbalancing", Service::elasticloadbalancing },
+ { "elasticmapreduce", Service::elasticmapreduce },
+ { "elastictranscoder", Service::elastictranscoder },
+ { "es", Service::es },
+ { "events", Service::events },
+ { "firehose", Service::firehose },
+ { "gamelift", Service::gamelift },
+ { "glacier", Service::glacier },
+ { "health", Service::health },
+ { "iam", Service::iam },
+ { "importexport", Service::importexport },
+ { "inspector", Service::inspector },
+ { "iot", Service::iot },
+ { "kinesis", Service::kinesis },
+ { "kinesisanalytics", Service::kinesisanalytics },
+ { "kms", Service::kms },
+ { "lambda", Service::lambda },
+ { "lightsail", Service::lightsail },
+ { "logs", Service::logs },
+ { "machinelearning", Service::machinelearning },
+ { "mobileanalytics", Service::mobileanalytics },
+ { "mobilehub", Service::mobilehub },
+ { "opsworks", Service::opsworks },
+ { "opsworks-cm", Service::opsworks_cm },
+ { "polly", Service::polly },
+ { "rds", Service::rds },
+ { "redshift", Service::redshift },
+ { "route53", Service::route53 },
+ { "route53domains", Service::route53domains },
+ { "s3", Service::s3 },
+ { "sdb", Service::sdb },
+ { "servicecatalog", Service::servicecatalog },
+ { "ses", Service::ses },
+ { "sns", Service::sns },
+ { "sqs", Service::sqs },
+ { "ssm", Service::ssm },
+ { "states", Service::states },
+ { "storagegateway", Service::storagegateway },
+ { "sts", Service::sts },
+ { "support", Service::support },
+ { "swf", Service::swf },
+ { "trustedadvisor", Service::trustedadvisor },
+ { "waf", Service::waf },
+ { "workmail", Service::workmail },
+ { "workspaces", Service::workspaces }};
+
+ if (wildcards && s == "*") {
+ return Service::wildcard;
+ }
+
+ auto i = services.find(s);
+ if (i == services.end()) {
+ return none;
+ } else {
+ return i->second;
+ }
+}
+}
+
+ARN::ARN(const rgw_obj& o)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(o.bucket.tenant),
+ resource(o.bucket.name)
+{
+ resource.push_back('/');
+ resource.append(o.key.name);
+}
+
+ARN::ARN(const rgw_bucket& b)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(b.tenant),
+ resource(b.name) { }
+
+ARN::ARN(const rgw_bucket& b, const string& o)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(b.tenant),
+ resource(b.name) {
+ resource.push_back('/');
+ resource.append(o);
+}
+
+optional<ARN> ARN::parse(const string& s, bool wildcards) {
+ static const regex rx_wild("arn:([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ static const regex rx_no_wild(
+ "arn:([^:*]*):([^:*]*):([^:*]*):([^:*]*):([^:*]*)",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+
+ smatch match;
+
+ if ((s == "*") && wildcards) {
+ return ARN(Partition::wildcard, Service::wildcard, "*", "*", "*");
+ } else if (regex_match(s, match, wildcards ? rx_wild : rx_no_wild)) {
+ ceph_assert(match.size() == 6);
+
+ ARN a;
+ {
+ auto p = to_partition(match[1], wildcards);
+ if (!p)
+ return none;
+
+ a.partition = *p;
+ }
+ {
+ auto s = to_service(match[2], wildcards);
+ if (!s) {
+ return none;
+ }
+ a.service = *s;
+ }
+
+ a.region = match[3];
+ a.account = match[4];
+ a.resource = match[5];
+
+ return a;
+ }
+ return none;
+}
+
+string ARN::to_string() const {
+ string s;
+
+ if (partition == Partition::aws) {
+ s.append("aws:");
+ } else if (partition == Partition::aws_cn) {
+ s.append("aws-cn:");
+ } else if (partition == Partition::aws_us_gov) {
+ s.append("aws-us-gov:");
+ } else {
+ s.append("*:");
+ }
+
+ static const unordered_map<Service, string> services = {
+ { Service::acm, "acm" },
+ { Service::apigateway, "apigateway" },
+ { Service::appstream, "appstream" },
+ { Service::artifact, "artifact" },
+ { Service::autoscaling, "autoscaling" },
+ { Service::aws_marketplace, "aws-marketplace" },
+ { Service::aws_marketplace_management, "aws-marketplace-management" },
+ { Service::aws_portal, "aws-portal" },
+ { Service::cloudformation, "cloudformation" },
+ { Service::cloudfront, "cloudfront" },
+ { Service::cloudhsm, "cloudhsm" },
+ { Service::cloudsearch, "cloudsearch" },
+ { Service::cloudtrail, "cloudtrail" },
+ { Service::cloudwatch, "cloudwatch" },
+ { Service::codebuild, "codebuild" },
+ { Service::codecommit, "codecommit" },
+ { Service::codedeploy, "codedeploy" },
+ { Service::codepipeline, "codepipeline" },
+ { Service::cognito_identity, "cognito-identity" },
+ { Service::cognito_idp, "cognito-idp" },
+ { Service::cognito_sync, "cognito-sync" },
+ { Service::config, "config" },
+ { Service::datapipeline, "datapipeline" },
+ { Service::devicefarm, "devicefarm" },
+ { Service::directconnect, "directconnect" },
+ { Service::dms, "dms" },
+ { Service::ds, "ds" },
+ { Service::dynamodb, "dynamodb" },
+ { Service::ec2, "ec2" },
+ { Service::ecr, "ecr" },
+ { Service::ecs, "ecs" },
+ { Service::elasticache, "elasticache" },
+ { Service::elasticbeanstalk, "elasticbeanstalk" },
+ { Service::elasticfilesystem, "elasticfilesystem" },
+ { Service::elasticloadbalancing, "elasticloadbalancing" },
+ { Service::elasticmapreduce, "elasticmapreduce" },
+ { Service::elastictranscoder, "elastictranscoder" },
+ { Service::es, "es" },
+ { Service::events, "events" },
+ { Service::firehose, "firehose" },
+ { Service::gamelift, "gamelift" },
+ { Service::glacier, "glacier" },
+ { Service::health, "health" },
+ { Service::iam, "iam" },
+ { Service::importexport, "importexport" },
+ { Service::inspector, "inspector" },
+ { Service::iot, "iot" },
+ { Service::kinesis, "kinesis" },
+ { Service::kinesisanalytics, "kinesisanalytics" },
+ { Service::kms, "kms" },
+ { Service::lambda, "lambda" },
+ { Service::lightsail, "lightsail" },
+ { Service::logs, "logs" },
+ { Service::machinelearning, "machinelearning" },
+ { Service::mobileanalytics, "mobileanalytics" },
+ { Service::mobilehub, "mobilehub" },
+ { Service::opsworks, "opsworks" },
+ { Service::opsworks_cm, "opsworks-cm" },
+ { Service::polly, "polly" },
+ { Service::rds, "rds" },
+ { Service::redshift, "redshift" },
+ { Service::route53, "route53" },
+ { Service::route53domains, "route53domains" },
+ { Service::s3, "s3" },
+ { Service::sdb, "sdb" },
+ { Service::servicecatalog, "servicecatalog" },
+ { Service::ses, "ses" },
+ { Service::sns, "sns" },
+ { Service::sqs, "sqs" },
+ { Service::ssm, "ssm" },
+ { Service::states, "states" },
+ { Service::storagegateway, "storagegateway" },
+ { Service::sts, "sts" },
+ { Service::support, "support" },
+ { Service::swf, "swf" },
+ { Service::trustedadvisor, "trustedadvisor" },
+ { Service::waf, "waf" },
+ { Service::workmail, "workmail" },
+ { Service::workspaces, "workspaces" }};
+
+ auto i = services.find(service);
+ if (i != services.end()) {
+ s.append(i->second);
+ } else {
+ s.push_back('*');
+ }
+ s.push_back(':');
+
+ s.append(region);
+ s.push_back(':');
+
+ s.append(account);
+ s.push_back(':');
+
+ s.append(resource);
+
+ return s;
+}
+
+bool operator ==(const ARN& l, const ARN& r) {
+ return ((l.partition == r.partition) &&
+ (l.service == r.service) &&
+ (l.region == r.region) &&
+ (l.account == r.account) &&
+ (l.resource == r.resource));
+}
+bool operator <(const ARN& l, const ARN& r) {
+ return ((l.partition < r.partition) ||
+ (l.service < r.service) ||
+ (l.region < r.region) ||
+ (l.account < r.account) ||
+ (l.resource < r.resource));
+}
+
+// The candidate is not allowed to have wildcards. The only way to
+// do that sanely would be to use unification rather than matching.
+bool ARN::match(const ARN& candidate) const {
+ if ((candidate.partition == Partition::wildcard) ||
+ (partition != candidate.partition && partition
+ != Partition::wildcard)) {
+ return false;
+ }
+
+ if ((candidate.service == Service::wildcard) ||
+ (service != candidate.service && service != Service::wildcard)) {
+ return false;
+ }
+
+ if (!::match(region, candidate.region, MATCH_POLICY_ARN)) {
+ return false;
+ }
+
+ if (!::match(account, candidate.account, MATCH_POLICY_ARN)) {
+ return false;
+ }
+
+ if (!::match(resource, candidate.resource, MATCH_POLICY_ARN)) {
+ return false;
+ }
+
+ return true;
+}
+
+static const actpair actpairs[] =
+{{ "s3:AbortMultipartUpload", s3AbortMultipartUpload },
+ { "s3:CreateBucket", s3CreateBucket },
+ { "s3:DeleteBucketPolicy", s3DeleteBucketPolicy },
+ { "s3:DeleteBucket", s3DeleteBucket },
+ { "s3:DeleteBucketWebsite", s3DeleteBucketWebsite },
+ { "s3:DeleteObject", s3DeleteObject },
+ { "s3:DeleteObjectVersion", s3DeleteObjectVersion },
+ { "s3:DeleteReplicationConfiguration", s3DeleteReplicationConfiguration },
+ { "s3:GetAccelerateConfiguration", s3GetAccelerateConfiguration },
+ { "s3:GetBucketAcl", s3GetBucketAcl },
+ { "s3:GetBucketCORS", s3GetBucketCORS },
+ { "s3:GetBucketLocation", s3GetBucketLocation },
+ { "s3:GetBucketLogging", s3GetBucketLogging },
+ { "s3:GetBucketNotification", s3GetBucketNotification },
+ { "s3:GetBucketPolicy", s3GetBucketPolicy },
+ { "s3:GetBucketRequestPayment", s3GetBucketRequestPayment },
+ { "s3:GetBucketTagging", s3GetBucketTagging },
+ { "s3:GetBucketVersioning", s3GetBucketVersioning },
+ { "s3:GetBucketWebsite", s3GetBucketWebsite },
+ { "s3:GetLifecycleConfiguration", s3GetLifecycleConfiguration },
+ { "s3:GetObjectAcl", s3GetObjectAcl },
+ { "s3:GetObject", s3GetObject },
+ { "s3:GetObjectTorrent", s3GetObjectTorrent },
+ { "s3:GetObjectVersionAcl", s3GetObjectVersionAcl },
+ { "s3:GetObjectVersion", s3GetObjectVersion },
+ { "s3:GetObjectVersionTorrent", s3GetObjectVersionTorrent },
+ { "s3:GetReplicationConfiguration", s3GetReplicationConfiguration },
+ { "s3:ListAllMyBuckets", s3ListAllMyBuckets },
+ { "s3:ListBucketMultiPartUploads", s3ListBucketMultiPartUploads },
+ { "s3:ListBucket", s3ListBucket },
+ { "s3:ListBucketVersions", s3ListBucketVersions },
+ { "s3:ListMultipartUploadParts", s3ListMultipartUploadParts },
+ { "s3:PutAccelerateConfiguration", s3PutAccelerateConfiguration },
+ { "s3:PutBucketAcl", s3PutBucketAcl },
+ { "s3:PutBucketCORS", s3PutBucketCORS },
+ { "s3:PutBucketLogging", s3PutBucketLogging },
+ { "s3:PutBucketNotification", s3PutBucketNotification },
+ { "s3:PutBucketPolicy", s3PutBucketPolicy },
+ { "s3:PutBucketRequestPayment", s3PutBucketRequestPayment },
+ { "s3:PutBucketTagging", s3PutBucketTagging },
+ { "s3:PutBucketVersioning", s3PutBucketVersioning },
+ { "s3:PutBucketWebsite", s3PutBucketWebsite },
+ { "s3:PutLifecycleConfiguration", s3PutLifecycleConfiguration },
+ { "s3:PutObjectAcl", s3PutObjectAcl },
+ { "s3:PutObject", s3PutObject },
+ { "s3:PutObjectVersionAcl", s3PutObjectVersionAcl },
+ { "s3:PutReplicationConfiguration", s3PutReplicationConfiguration },
+ { "s3:RestoreObject", s3RestoreObject }};
+
+struct PolicyParser;
+
+const Keyword top[1]{"<Top>", TokenKind::pseudo, TokenID::Top, 0, false,
+ false};
+const Keyword cond_key[1]{"<Condition Key>", TokenKind::cond_key,
+ TokenID::CondKey, 0, true, false};
+
+struct ParseState {
+ PolicyParser* pp;
+ const Keyword* w;
+
+ bool arraying = false;
+ bool objecting = false;
+
+ void reset();
+
+ ParseState(PolicyParser* pp, const Keyword* w)
+ : pp(pp), w(w) {}
+
+ bool obj_start();
+
+ bool obj_end();
+
+ bool array_start() {
+ if (w->arrayable && !arraying) {
+ arraying = true;
+ return true;
+ }
+ return false;
+ }
+
+ bool array_end();
+
+ bool key(const char* s, size_t l);
+ bool do_string(CephContext* cct, const char* s, size_t l);
+ bool number(const char* str, size_t l);
+};
+
+// If this confuses you, look up the Curiously Recurring Template Pattern
+struct PolicyParser : public BaseReaderHandler<UTF8<>, PolicyParser> {
+ keyword_hash tokens;
+ std::vector<ParseState> s;
+ CephContext* cct;
+ const string& tenant;
+ Policy& policy;
+
+ uint32_t seen = 0;
+
+ uint32_t dex(TokenID in) const {
+ switch (in) {
+ case TokenID::Version:
+ return 0x1;
+ case TokenID::Id:
+ return 0x2;
+ case TokenID::Statement:
+ return 0x4;
+ case TokenID::Sid:
+ return 0x8;
+ case TokenID::Effect:
+ return 0x10;
+ case TokenID::Principal:
+ return 0x20;
+ case TokenID::NotPrincipal:
+ return 0x40;
+ case TokenID::Action:
+ return 0x80;
+ case TokenID::NotAction:
+ return 0x100;
+ case TokenID::Resource:
+ return 0x200;
+ case TokenID::NotResource:
+ return 0x400;
+ case TokenID::Condition:
+ return 0x800;
+ case TokenID::AWS:
+ return 0x1000;
+ case TokenID::Federated:
+ return 0x2000;
+ case TokenID::Service:
+ return 0x4000;
+ case TokenID::CanonicalUser:
+ return 0x8000;
+ default:
+ ceph_abort();
+ }
+ }
+ bool test(TokenID in) {
+ return seen & dex(in);
+ }
+ void set(TokenID in) {
+ seen |= dex(in);
+ }
+ void set(std::initializer_list<TokenID> l) {
+ for (auto in : l) {
+ seen |= dex(in);
+ }
+ }
+ void reset(TokenID in) {
+ seen &= ~dex(in);
+ }
+ void reset(std::initializer_list<TokenID> l) {
+ for (auto in : l) {
+ seen &= ~dex(in);
+ }
+ }
+
+ PolicyParser(CephContext* cct, const string& tenant, Policy& policy)
+ : cct(cct), tenant(tenant), policy(policy) {}
+ PolicyParser(const PolicyParser& policy) = delete;
+
+ bool StartObject() {
+ if (s.empty()) {
+ s.push_back({this, top});
+ s.back().objecting = true;
+ return true;
+ }
+
+ return s.back().obj_start();
+ }
+ bool EndObject(SizeType memberCount) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().obj_end();
+ }
+ bool Key(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().key(str, length);
+ }
+
+ bool String(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().do_string(cct, str, length);
+ }
+ bool RawNumber(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().number(str, length);
+ }
+ bool StartArray() {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().array_start();
+ }
+ bool EndArray(SizeType) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().array_end();
+ }
+
+ bool Default() {
+ return false;
+ }
+};
+
+
+// I really despise this misfeature of C++.
+//
+bool ParseState::obj_end() {
+ if (objecting) {
+ objecting = false;
+ if (!arraying) {
+ pp->s.pop_back();
+ } else {
+ reset();
+ }
+ return true;
+ }
+ return false;
+}
+
+bool ParseState::key(const char* s, size_t l) {
+ auto k = pp->tokens.lookup(s, l);
+
+ if (!k) {
+ if (w->kind == TokenKind::cond_op) {
+ auto& t = pp->policy.statements.back();
+ pp->s.emplace_back(pp, cond_key);
+ t.conditions.emplace_back(w->id, s, l);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // If the token we're going with belongs within the condition at the
+ // top of the stack and we haven't already encountered it, push it
+ // on the stack
+
+ // Top
+ if ((((w->id == TokenID::Top) && (k->kind == TokenKind::top)) ||
+ // Statement
+ ((w->id == TokenID::Statement) && (k->kind == TokenKind::statement)) ||
+
+ /// Principal
+ ((w->id == TokenID::Principal || w->id == TokenID::NotPrincipal) &&
+ (k->kind == TokenKind::princ_type))) &&
+
+ // Check that it hasn't been encountered. Note that this
+ // conjoins with the run of disjunctions above.
+ !pp->test(k->id)) {
+ pp->set(k->id);
+ pp->s.emplace_back(pp, k);
+ return true;
+ } else if ((w->id == TokenID::Condition) &&
+ (k->kind == TokenKind::cond_op)) {
+ pp->s.emplace_back(pp, k);
+ return true;
+ }
+ return false;
+}
+
+// I should just rewrite a few helper functions to use iterators,
+// which will make all of this ever so much nicer.
+static optional<Principal> parse_principal(CephContext* cct, TokenID t,
+ string&& s) {
+ // Wildcard!
+ if ((t == TokenID::AWS) && (s == "*")) {
+ return Principal::wildcard();
+
+ // Do nothing for now.
+ } else if (t == TokenID::CanonicalUser) {
+
+ // AWS ARNs
+ } else if (t == TokenID::AWS) {
+ auto a = ARN::parse(s);
+ if (!a) {
+ if (std::none_of(s.begin(), s.end(),
+ [](const char& c) {
+ return (c == ':') || (c == '/');
+ })) {
+ // Since tenants are simply prefixes, there's no really good
+ // way to see if one exists or not. So we return the thing and
+ // let them try to match against it.
+ return Principal::tenant(std::move(s));
+ }
+ }
+
+ if (a->resource == "root") {
+ return Principal::tenant(std::move(a->account));
+ }
+
+ static const regex rx("([^/]*)/(.*)",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ smatch match;
+ if (regex_match(a->resource, match, rx)) {
+ ceph_assert(match.size() == 2);
+
+ if (match[1] == "user") {
+ return Principal::user(std::move(a->account),
+ match[2]);
+ }
+
+ if (match[1] == "role") {
+ return Principal::role(std::move(a->account),
+ match[2]);
+ }
+ }
+ }
+
+ ldout(cct, 0) << "Supplied principal is discarded: " << s << dendl;
+ return boost::none;
+}
+
+bool ParseState::do_string(CephContext* cct, const char* s, size_t l) {
+ auto k = pp->tokens.lookup(s, l);
+ Policy& p = pp->policy;
+ Statement* t = p.statements.empty() ? nullptr : &(p.statements.back());
+
+ // Top level!
+ if ((w->id == TokenID::Version) && k &&
+ k->kind == TokenKind::version_key) {
+ p.version = static_cast<Version>(k->specific);
+ } else if (w->id == TokenID::Id) {
+ p.id = string(s, l);
+
+ // Statement
+
+ } else if (w->id == TokenID::Sid) {
+ t->sid.emplace(s, l);
+ } else if ((w->id == TokenID::Effect) &&
+ k->kind == TokenKind::effect_key) {
+ t->effect = static_cast<Effect>(k->specific);
+ } else if (w->id == TokenID::Principal && s && *s == '*') {
+ t->princ.emplace(Principal::wildcard());
+ } else if (w->id == TokenID::NotPrincipal && s && *s == '*') {
+ t->noprinc.emplace(Principal::wildcard());
+ } else if ((w->id == TokenID::Action) ||
+ (w->id == TokenID::NotAction)) {
+ for (auto& p : actpairs) {
+ if (match({s, l}, p.name, MATCH_POLICY_ACTION)) {
+ (w->id == TokenID::Action ? t->action : t->notaction) |= p.bit;
+ }
+ }
+ } else if (w->id == TokenID::Resource || w->id == TokenID::NotResource) {
+ auto a = ARN::parse({s, l}, true);
+ // You can't specify resources for someone ELSE'S account.
+ if (a && (a->account.empty() || a->account == pp->tenant ||
+ a->account == "*")) {
+ if (a->account.empty() || a->account == "*")
+ a->account = pp->tenant;
+ (w->id == TokenID::Resource ? t->resource : t->notresource)
+ .emplace(std::move(*a));
+ }
+ else
+ ldout(cct, 0) << "Supplied resource is discarded: " << string(s, l)
+ << dendl;
+ } else if (w->kind == TokenKind::cond_key) {
+ auto& t = pp->policy.statements.back();
+ t.conditions.back().vals.emplace_back(s, l);
+
+ // Principals
+
+ } else if (w->kind == TokenKind::princ_type) {
+ ceph_assert(pp->s.size() > 1);
+ auto& pri = pp->s[pp->s.size() - 2].w->id == TokenID::Principal ?
+ t->princ : t->noprinc;
+
+ auto o = parse_principal(pp->cct, w->id, string(s, l));
+ if (o)
+ pri.emplace(std::move(*o));
+
+ // Failure
+
+ } else {
+ return false;
+ }
+
+ if (!arraying) {
+ pp->s.pop_back();
+ }
+
+ return true;
+}
+
+bool ParseState::number(const char* s, size_t l) {
+ // Top level!
+ if (w->kind == TokenKind::cond_key) {
+ auto& t = pp->policy.statements.back();
+ t.conditions.back().vals.emplace_back(s, l);
+
+ // Failure
+
+ } else {
+ return false;
+ }
+
+ if (!arraying) {
+ pp->s.pop_back();
+ }
+
+ return true;
+}
+
+void ParseState::reset() {
+ pp->reset({TokenID::Sid, TokenID::Effect, TokenID::Principal,
+ TokenID::NotPrincipal, TokenID::Action, TokenID::NotAction,
+ TokenID::Resource, TokenID::NotResource, TokenID::Condition});
+}
+
+bool ParseState::obj_start() {
+ if (w->objectable && !objecting) {
+ objecting = true;
+ if (w->id == TokenID::Statement) {
+ pp->policy.statements.push_back({});
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ParseState::array_end() {
+ if (arraying && !objecting) {
+ pp->s.pop_back();
+ return true;
+ }
+
+ return false;
+}
+
+ostream& operator <<(ostream& m, const MaskedIP& ip) {
+ // I have a theory about why std::bitset is the way it is.
+ if (ip.v6) {
+ for (int i = 15; i >= 0; --i) {
+ uint8_t b = 0;
+ for (int j = 7; j >= 0; --j) {
+ b |= (ip.addr[(i * 8) + j] << j);
+ }
+ m << hex << b;
+ if (i != 0) {
+ m << "::";
+ }
+ }
+ } else {
+ // It involves Satan.
+ for (int i = 3; i >= 0; --i) {
+ uint8_t b = 0;
+ for (int j = 7; j >= 0; --j) {
+ b |= (ip.addr[(i * 8) + j] << j);
+ }
+ m << b;
+ if (i != 0) {
+ m << ".";
+ }
+ }
+ }
+ m << "/" << ip.prefix;
+ // It would explain a lot
+ return m;
+}
+
+string to_string(const MaskedIP& m) {
+ stringstream ss;
+ ss << m;
+ return ss.str();
+}
+
+bool Condition::eval(const Environment& env) const {
+ auto i = env.find(key);
+ if (op == TokenID::Null) {
+ return i == env.end() ? true : false;
+ }
+
+ if (i == env.end()) {
+ return false;
+ }
+ const auto& s = i->second;
+
+ switch (op) {
+ // String!
+ case TokenID::StringEquals:
+ return orrible(std::equal_to<std::string>(), s, vals);
+
+ case TokenID::StringNotEquals:
+ return orrible(std::not2(std::equal_to<std::string>()),
+ s, vals);
+
+ case TokenID::StringEqualsIgnoreCase:
+ return orrible(ci_equal_to(), s, vals);
+
+ case TokenID::StringNotEqualsIgnoreCase:
+ return orrible(std::not2(ci_equal_to()), s, vals);
+
+ // Implement actual StringLike with wildcarding later
+ case TokenID::StringLike:
+ return orrible(std::equal_to<std::string>(), s, vals);
+ case TokenID::StringNotLike:
+ return orrible(std::not2(std::equal_to<std::string>()),
+ s, vals);
+
+ // Numeric
+ case TokenID::NumericEquals:
+ return shortible(std::equal_to<double>(), as_number, s, vals);
+
+ case TokenID::NumericNotEquals:
+ return shortible(std::not2(std::equal_to<double>()),
+ as_number, s, vals);
+
+
+ case TokenID::NumericLessThan:
+ return shortible(std::less<double>(), as_number, s, vals);
+
+
+ case TokenID::NumericLessThanEquals:
+ return shortible(std::less_equal<double>(), as_number, s, vals);
+
+ case TokenID::NumericGreaterThan:
+ return shortible(std::greater<double>(), as_number, s, vals);
+
+ case TokenID::NumericGreaterThanEquals:
+ return shortible(std::greater_equal<double>(), as_number, s, vals);
+
+ // Date!
+ case TokenID::DateEquals:
+ return shortible(std::equal_to<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateNotEquals:
+ return shortible(std::not2(std::equal_to<ceph::real_time>()),
+ as_date, s, vals);
+
+ case TokenID::DateLessThan:
+ return shortible(std::less<ceph::real_time>(), as_date, s, vals);
+
+
+ case TokenID::DateLessThanEquals:
+ return shortible(std::less_equal<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateGreaterThan:
+ return shortible(std::greater<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateGreaterThanEquals:
+ return shortible(std::greater_equal<ceph::real_time>(), as_date, s,
+ vals);
+
+ // Bool!
+ case TokenID::Bool:
+ return shortible(std::equal_to<bool>(), as_bool, s, vals);
+
+ // Binary!
+ case TokenID::BinaryEquals:
+ return shortible(std::equal_to<ceph::bufferlist>(), as_binary, s,
+ vals);
+
+ // IP Address!
+ case TokenID::IpAddress:
+ return shortible(std::equal_to<MaskedIP>(), as_network, s, vals);
+
+ case TokenID::NotIpAddress:
+ return shortible(std::not2(std::equal_to<MaskedIP>()), as_network, s,
+ vals);
+
+#if 0
+ // Amazon Resource Names! (Does S3 need this?)
+ TokenID::ArnEquals, TokenID::ArnNotEquals, TokenID::ArnLike,
+ TokenID::ArnNotLike,
+#endif
+
+ default:
+ return false;
+ }
+}
+
+optional<MaskedIP> Condition::as_network(const string& s) {
+ MaskedIP m;
+ if (s.empty()) {
+ return none;
+ }
+
+ m.v6 = s.find(':');
+ auto slash = s.find('/');
+ if (slash == string::npos) {
+ m.prefix = m.v6 ? 128 : 32;
+ } else {
+ char* end = 0;
+ m.prefix = strtoul(s.data() + slash + 1, &end, 10);
+ if (*end != 0 || (m.v6 && m.prefix > 128) ||
+ (!m.v6 && m.prefix > 32)) {
+ return none;
+ }
+ }
+
+ string t;
+ auto p = &s;
+
+ if (slash != string::npos) {
+ t.assign(s, 0, slash);
+ p = &t;
+ }
+
+ if (m.v6) {
+ struct sockaddr_in6 a;
+ if (inet_pton(AF_INET6, p->c_str(), static_cast<void*>(&a)) != 1) {
+ return none;
+ }
+
+ m.addr |= Address(a.sin6_addr.s6_addr[0]) << 0;
+ m.addr |= Address(a.sin6_addr.s6_addr[1]) << 8;
+ m.addr |= Address(a.sin6_addr.s6_addr[2]) << 16;
+ m.addr |= Address(a.sin6_addr.s6_addr[3]) << 24;
+ m.addr |= Address(a.sin6_addr.s6_addr[4]) << 32;
+ m.addr |= Address(a.sin6_addr.s6_addr[5]) << 40;
+ m.addr |= Address(a.sin6_addr.s6_addr[6]) << 48;
+ m.addr |= Address(a.sin6_addr.s6_addr[7]) << 56;
+ m.addr |= Address(a.sin6_addr.s6_addr[8]) << 64;
+ m.addr |= Address(a.sin6_addr.s6_addr[9]) << 72;
+ m.addr |= Address(a.sin6_addr.s6_addr[10]) << 80;
+ m.addr |= Address(a.sin6_addr.s6_addr[11]) << 88;
+ m.addr |= Address(a.sin6_addr.s6_addr[12]) << 96;
+ m.addr |= Address(a.sin6_addr.s6_addr[13]) << 104;
+ m.addr |= Address(a.sin6_addr.s6_addr[14]) << 112;
+ m.addr |= Address(a.sin6_addr.s6_addr[15]) << 120;
+ } else {
+ struct sockaddr_in a;
+ if (inet_pton(AF_INET, p->c_str(), static_cast<void*>(&a)) != 1) {
+ return none;
+ }
+ m.addr = ntohl(a.sin_addr.s_addr);
+ }
+
+ return none;
+}
+
+namespace {
+const char* condop_string(const TokenID t) {
+ switch (t) {
+ case TokenID::StringEquals:
+ return "StringEquals";
+
+ case TokenID::StringNotEquals:
+ return "StringNotEquals";
+
+ case TokenID::StringEqualsIgnoreCase:
+ return "StringEqualsIgnoreCase";
+
+ case TokenID::StringNotEqualsIgnoreCase:
+ return "StringNotEqualsIgnoreCase";
+
+ case TokenID::StringLike:
+ return "StringLike";
+
+ case TokenID::StringNotLike:
+ return "StringNotLike";
+
+ // Numeric!
+ case TokenID::NumericEquals:
+ return "NumericEquals";
+
+ case TokenID::NumericNotEquals:
+ return "NumericNotEquals";
+
+ case TokenID::NumericLessThan:
+ return "NumericLessThan";
+
+ case TokenID::NumericLessThanEquals:
+ return "NumericLessThanEquals";
+
+ case TokenID::NumericGreaterThan:
+ return "NumericGreaterThan";
+
+ case TokenID::NumericGreaterThanEquals:
+ return "NumericGreaterThanEquals";
+
+ case TokenID::DateEquals:
+ return "DateEquals";
+
+ case TokenID::DateNotEquals:
+ return "DateNotEquals";
+
+ case TokenID::DateLessThan:
+ return "DateLessThan";
+
+ case TokenID::DateLessThanEquals:
+ return "DateLessThanEquals";
+
+ case TokenID::DateGreaterThan:
+ return "DateGreaterThan";
+
+ case TokenID::DateGreaterThanEquals:
+ return "DateGreaterThanEquals";
+
+ case TokenID::Bool:
+ return "Bool";
+
+ case TokenID::BinaryEquals:
+ return "BinaryEquals";
+
+ case TokenID::IpAddress:
+ return "case TokenID::IpAddress";
+
+ case TokenID::NotIpAddress:
+ return "NotIpAddress";
+
+ case TokenID::ArnEquals:
+ return "ArnEquals";
+
+ case TokenID::ArnNotEquals:
+ return "ArnNotEquals";
+
+ case TokenID::ArnLike:
+ return "ArnLike";
+
+ case TokenID::ArnNotLike:
+ return "ArnNotLike";
+
+ case TokenID::Null:
+ return "Null";
+
+ default:
+ return "InvalidConditionOperator";
+ }
+}
+
+template<typename Iterator>
+ostream& print_array(ostream& m, Iterator begin, Iterator end) {
+ if (begin == end) {
+ m << "[";
+ } else {
+ auto beforelast = end - 1;
+ m << "[ ";
+ for (auto i = begin; i != end; ++i) {
+ m << *i;
+ if (i != beforelast) {
+ m << ", ";
+ } else {
+ m << " ";
+ }
+ }
+ }
+ m << "]";
+ return m;
+}
+}
+
+ostream& operator <<(ostream& m, const Condition& c) {
+ m << "{ " << condop_string(c.op) << ": { " << c.key;
+ if (c.ifexists) {
+ m << "IfExists";
+ }
+ print_array(m, c.vals.cbegin(), c.vals.cend());
+ return m << "}";
+}
+
+string to_string(const Condition& c) {
+ stringstream ss;
+ ss << c;
+ return ss.str();
+}
+
+Effect Statement::eval(const Environment& e,
+ optional<const rgw::auth::Identity&> ida,
+ uint64_t act, const ARN& res) const {
+ if (ida && (!ida->is_identity(princ) || ida->is_identity(noprinc))) {
+ return Effect::Pass;
+ }
+
+
+ if (!std::any_of(resource.begin(), resource.end(),
+ [&res](const ARN& pattern) {
+ return pattern.match(res);
+ }) ||
+ (std::any_of(notresource.begin(), notresource.end(),
+ [&res](const ARN& pattern) {
+ return pattern.match(res);
+ }))) {
+ return Effect::Pass;
+ }
+
+ if (!(action & act) || (notaction & act)) {
+ return Effect::Pass;
+ }
+
+ if (std::all_of(conditions.begin(),
+ conditions.end(),
+ [&e](const Condition& c) { return c.eval(e);})) {
+ return effect;
+ }
+
+ return Effect::Pass;
+}
+
+namespace {
+const char* action_bit_string(uint64_t action) {
+ switch (action) {
+ case s3GetObject:
+ return "s3:GetObject";
+
+ case s3GetObjectVersion:
+ return "s3:GetObjectVersion";
+
+ case s3PutObject:
+ return "s3:PutObject";
+
+ case s3GetObjectAcl:
+ return "s3:GetObjectAcl";
+
+ case s3GetObjectVersionAcl:
+ return "s3:GetObjectVersionAcl";
+
+ case s3PutObjectAcl:
+ return "s3:PutObjectAcl";
+
+ case s3PutObjectVersionAcl:
+ return "s3:PutObjectVersionAcl";
+
+ case s3DeleteObject:
+ return "s3:DeleteObject";
+
+ case s3DeleteObjectVersion:
+ return "s3:DeleteObjectVersion";
+
+ case s3ListMultipartUploadParts:
+ return "s3:ListMultipartUploadParts";
+
+ case s3AbortMultipartUpload:
+ return "s3:AbortMultipartUpload";
+
+ case s3GetObjectTorrent:
+ return "s3:GetObjectTorrent";
+
+ case s3GetObjectVersionTorrent:
+ return "s3:GetObjectVersionTorrent";
+
+ case s3RestoreObject:
+ return "s3:RestoreObject";
+
+ case s3CreateBucket:
+ return "s3:CreateBucket";
+
+ case s3DeleteBucket:
+ return "s3:DeleteBucket";
+
+ case s3ListBucket:
+ return "s3:ListBucket";
+
+ case s3ListBucketVersions:
+ return "s3:ListBucketVersions";
+ case s3ListAllMyBuckets:
+ return "s3:ListAllMyBuckets";
+
+ case s3ListBucketMultiPartUploads:
+ return "s3:ListBucketMultiPartUploads";
+
+ case s3GetAccelerateConfiguration:
+ return "s3:GetAccelerateConfiguration";
+
+ case s3PutAccelerateConfiguration:
+ return "s3:PutAccelerateConfiguration";
+
+ case s3GetBucketAcl:
+ return "s3:GetBucketAcl";
+
+ case s3PutBucketAcl:
+ return "s3:PutBucketAcl";
+
+ case s3GetBucketCORS:
+ return "s3:GetBucketCORS";
+
+ case s3PutBucketCORS:
+ return "s3:PutBucketCORS";
+
+ case s3GetBucketVersioning:
+ return "s3:GetBucketVersioning";
+
+ case s3PutBucketVersioning:
+ return "s3:PutBucketVersioning";
+
+ case s3GetBucketRequestPayment:
+ return "s3:GetBucketRequestPayment";
+
+ case s3PutBucketRequestPayment:
+ return "s3:PutBucketRequestPayment";
+
+ case s3GetBucketLocation:
+ return "s3:GetBucketLocation";
+
+ case s3GetBucketPolicy:
+ return "s3:GetBucketPolicy";
+
+ case s3DeleteBucketPolicy:
+ return "s3:DeleteBucketPolicy";
+
+ case s3PutBucketPolicy:
+ return "s3:PutBucketPolicy";
+
+ case s3GetBucketNotification:
+ return "s3:GetBucketNotification";
+
+ case s3PutBucketNotification:
+ return "s3:PutBucketNotification";
+
+ case s3GetBucketLogging:
+ return "s3:GetBucketLogging";
+
+ case s3PutBucketLogging:
+ return "s3:PutBucketLogging";
+
+ case s3GetBucketTagging:
+ return "s3:GetBucketTagging";
+
+ case s3PutBucketTagging:
+ return "s3:PutBucketTagging";
+
+ case s3GetBucketWebsite:
+ return "s3:GetBucketWebsite";
+
+ case s3PutBucketWebsite:
+ return "s3:PutBucketWebsite";
+
+ case s3DeleteBucketWebsite:
+ return "s3:DeleteBucketWebsite";
+
+ case s3GetLifecycleConfiguration:
+ return "s3:GetLifecycleConfiguration";
+
+ case s3PutLifecycleConfiguration:
+ return "s3:PutLifecycleConfiguration";
+
+ case s3PutReplicationConfiguration:
+ return "s3:PutReplicationConfiguration";
+
+ case s3GetReplicationConfiguration:
+ return "s3:GetReplicationConfiguration";
+
+ case s3DeleteReplicationConfiguration:
+ return "s3:DeleteReplicationConfiguration";
+ }
+ return "s3Invalid";
+}
+
+ostream& print_actions(ostream& m, const uint64_t a) {
+ bool begun = false;
+ m << "[ ";
+ for (auto i = 0U; i < s3Count; ++i) {
+ if (a & (1 << i)) {
+ if (begun) {
+ m << ", ";
+ } else {
+ begun = true;
+ }
+ m << action_bit_string(1 << i);
+ }
+ }
+ if (begun) {
+ m << " ]";
+ } else {
+ m << "]";
+ }
+ return m;
+}
+}
+
+ostream& operator <<(ostream& m, const Statement& s) {
+ m << "{ ";
+ if (s.sid) {
+ m << "Sid: " << *s.sid << ", ";
+ }
+ if (!s.princ.empty()) {
+ m << "Principal: ";
+ print_array(m, s.princ.cbegin(), s.princ.cend());
+ m << ", ";
+ }
+ if (!s.noprinc.empty()) {
+ m << "NotPrincipal: ";
+ print_array(m, s.noprinc.cbegin(), s.noprinc.cend());
+ m << ", ";
+ }
+
+ m << "Effect: " <<
+ (s.effect == Effect::Allow ?
+ (const char*) "Allow" :
+ (const char*) "Deny");
+
+ if (s.action || s.notaction || !s.resource.empty() ||
+ !s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+
+ if (s.action) {
+ m << "Action: ";
+ print_actions(m, s.action);
+
+ if (s.notaction || !s.resource.empty() ||
+ !s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (s.notaction) {
+ m << "NotAction: ";
+ print_actions(m, s.notaction);
+
+ if (!s.resource.empty() || !s.notresource.empty() ||
+ !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.resource.empty()) {
+ m << "Resource: ";
+ print_array(m, s.resource.cbegin(), s.resource.cend());
+
+ if (!s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.notresource.empty()) {
+ m << "NotResource: ";
+ print_array(m, s.notresource.cbegin(), s.notresource.cend());
+
+ if (!s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.conditions.empty()) {
+ m << "Condition: ";
+ print_array(m, s.conditions.cbegin(), s.conditions.cend());
+ }
+
+ return m << " }";
+}
+
+string to_string(const Statement& s) {
+ stringstream m;
+ m << s;
+ return m.str();
+}
+
+Policy::Policy(CephContext* cct, const string& tenant,
+ const bufferlist& _text)
+ : text(_text.to_str()) {
+ StringStream ss(text.data());
+ PolicyParser pp(cct, tenant, *this);
+ auto pr = Reader{}.Parse<kParseNumbersAsStringsFlag |
+ kParseCommentsFlag>(ss, pp);
+ if (!pr) {
+ throw PolicyParseException(std::move(pr));
+ }
+}
+
+Effect Policy::eval(const Environment& e,
+ optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const {
+ auto allowed = false;
+ for (auto& s : statements) {
+ auto g = s.eval(e, ida, action, resource);
+ if (g == Effect::Deny) {
+ return g;
+ } else if (g == Effect::Allow) {
+ allowed = true;
+ }
+ }
+ return allowed ? Effect::Allow : Effect::Pass;
+}
+
+ostream& operator <<(ostream& m, const Policy& p) {
+ m << "{ Version: "
+ << (p.version == Version::v2008_10_17 ? "2008-10-17" : "2012-10-17");
+
+ if (p.id || !p.statements.empty()) {
+ m << ", ";
+ }
+
+ if (p.id) {
+ m << "Id: " << *p.id;
+ if (!p.statements.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!p.statements.empty()) {
+ m << "Statements: ";
+ print_array(m, p.statements.cbegin(), p.statements.cend());
+ m << ", ";
+ }
+ return m << " }";
+}
+
+string to_string(const Policy& p) {
+ stringstream s;
+ s << p;
+ return s.str();
+}
+
+}
+}
diff --git a/src/rgw/rgw_iam_policy.h b/src/rgw/rgw_iam_policy.h
new file mode 100644
index 00000000000..4429a574309
--- /dev/null
+++ b/src/rgw/rgw_iam_policy.h
@@ -0,0 +1,466 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_IAM_POLICY_H
+#define CEPH_RGW_IAM_POLICY_H
+
+#include <bitset>
+#include <chrono>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
+#include <boost/optional.hpp>
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/utility/string_ref.hpp>
+#include <boost/variant.hpp>
+
+#include "common/ceph_time.h"
+#include "common/iso_8601.h"
+
+#include "rapidjson/error/error.h"
+#include "rapidjson/error/en.h"
+
+#include "fnmatch.h"
+
+#include "rgw_basic_types.h"
+#include "rgw_iam_policy_keywords.h"
+
+#include "include/assert.h" // razzin' frazzin' ...grrr.
+
+class RGWRados;
+namespace rgw {
+namespace auth {
+class Identity;
+}
+}
+struct rgw_obj;
+struct rgw_bucket;
+
+namespace rgw {
+namespace IAM {
+static constexpr std::uint64_t s3None = 0;
+static constexpr std::uint64_t s3GetObject = 1ULL << 0;
+static constexpr std::uint64_t s3GetObjectVersion = 1ULL << 1;
+static constexpr std::uint64_t s3PutObject = 1ULL << 2;
+static constexpr std::uint64_t s3GetObjectAcl = 1ULL << 3;
+static constexpr std::uint64_t s3GetObjectVersionAcl = 1ULL << 4;
+static constexpr std::uint64_t s3PutObjectAcl = 1ULL << 5;
+static constexpr std::uint64_t s3PutObjectVersionAcl = 1ULL << 6;
+static constexpr std::uint64_t s3DeleteObject = 1ULL << 7;
+static constexpr std::uint64_t s3DeleteObjectVersion = 1ULL << 8;
+static constexpr std::uint64_t s3ListMultipartUploadParts = 1ULL << 9;
+static constexpr std::uint64_t s3AbortMultipartUpload = 1ULL << 10;
+static constexpr std::uint64_t s3GetObjectTorrent = 1ULL << 11;
+static constexpr std::uint64_t s3GetObjectVersionTorrent = 1ULL << 12;
+static constexpr std::uint64_t s3RestoreObject = 1ULL << 13;
+static constexpr std::uint64_t s3CreateBucket = 1ULL << 14;
+static constexpr std::uint64_t s3DeleteBucket = 1ULL << 15;
+static constexpr std::uint64_t s3ListBucket = 1ULL << 16;
+static constexpr std::uint64_t s3ListBucketVersions = 1ULL << 17;
+static constexpr std::uint64_t s3ListAllMyBuckets = 1ULL << 18;
+static constexpr std::uint64_t s3ListBucketMultiPartUploads = 1ULL << 19;
+static constexpr std::uint64_t s3GetAccelerateConfiguration = 1ULL << 20;
+static constexpr std::uint64_t s3PutAccelerateConfiguration = 1ULL << 21;
+static constexpr std::uint64_t s3GetBucketAcl = 1ULL << 22;
+static constexpr std::uint64_t s3PutBucketAcl = 1ULL << 23;
+static constexpr std::uint64_t s3GetBucketCORS = 1ULL << 24;
+static constexpr std::uint64_t s3PutBucketCORS = 1ULL << 25;
+static constexpr std::uint64_t s3GetBucketVersioning = 1ULL << 26;
+static constexpr std::uint64_t s3PutBucketVersioning = 1ULL << 27;
+static constexpr std::uint64_t s3GetBucketRequestPayment = 1ULL << 28;
+static constexpr std::uint64_t s3PutBucketRequestPayment = 1ULL << 29;
+static constexpr std::uint64_t s3GetBucketLocation = 1ULL << 30;
+static constexpr std::uint64_t s3GetBucketPolicy = 1ULL << 31;
+static constexpr std::uint64_t s3DeleteBucketPolicy = 1ULL << 32;
+static constexpr std::uint64_t s3PutBucketPolicy = 1ULL << 33;
+static constexpr std::uint64_t s3GetBucketNotification = 1ULL << 34;
+static constexpr std::uint64_t s3PutBucketNotification = 1ULL << 35;
+static constexpr std::uint64_t s3GetBucketLogging = 1ULL << 36;
+static constexpr std::uint64_t s3PutBucketLogging = 1ULL << 37;
+static constexpr std::uint64_t s3GetBucketTagging = 1ULL << 38;
+static constexpr std::uint64_t s3PutBucketTagging = 1ULL << 39;
+static constexpr std::uint64_t s3GetBucketWebsite = 1ULL << 40;
+static constexpr std::uint64_t s3PutBucketWebsite = 1ULL << 41;
+static constexpr std::uint64_t s3DeleteBucketWebsite = 1ULL << 42;
+static constexpr std::uint64_t s3GetLifecycleConfiguration = 1ULL << 43;
+static constexpr std::uint64_t s3PutLifecycleConfiguration = 1ULL << 44;
+static constexpr std::uint64_t s3PutReplicationConfiguration = 1ULL << 45;
+static constexpr std::uint64_t s3GetReplicationConfiguration = 1ULL << 46;
+static constexpr std::uint64_t s3DeleteReplicationConfiguration = 1ULL << 47;
+static constexpr std::uint64_t s3Count = 48;
+static constexpr std::uint64_t s3All = (1ULL << s3Count) - 1;
+
+namespace {
+inline int op_to_perm(std::uint64_t op) {
+ switch (op) {
+ case s3GetObject:
+ case s3GetObjectTorrent:
+ case s3GetObjectVersion:
+ case s3GetObjectVersionTorrent:
+ case s3ListAllMyBuckets:
+ case s3ListBucket:
+ case s3ListBucketMultiPartUploads:
+ case s3ListBucketVersions:
+ case s3ListMultipartUploadParts:
+ return RGW_PERM_READ;
+
+ case s3AbortMultipartUpload:
+ case s3CreateBucket:
+ case s3DeleteBucket:
+ case s3DeleteObject:
+ case s3DeleteObjectVersion:
+ case s3PutObject:
+ case s3RestoreObject:
+ return RGW_PERM_WRITE;
+
+ case s3GetAccelerateConfiguration:
+ case s3GetBucketAcl:
+ case s3GetBucketCORS:
+ case s3GetBucketLocation:
+ case s3GetBucketLogging:
+ case s3GetBucketNotification:
+ case s3GetBucketPolicy:
+ case s3GetBucketRequestPayment:
+ case s3GetBucketTagging:
+ case s3GetBucketVersioning:
+ case s3GetBucketWebsite:
+ case s3GetLifecycleConfiguration:
+ case s3GetObjectAcl:
+ case s3GetObjectVersionAcl:
+ case s3GetReplicationConfiguration:
+ return RGW_PERM_READ_ACP;
+
+ case s3DeleteBucketPolicy:
+ case s3DeleteBucketWebsite:
+ case s3DeleteReplicationConfiguration:
+ case s3PutAccelerateConfiguration:
+ case s3PutBucketAcl:
+ case s3PutBucketCORS:
+ case s3PutBucketLogging:
+ case s3PutBucketNotification:
+ case s3PutBucketPolicy:
+ case s3PutBucketRequestPayment:
+ case s3PutBucketTagging:
+ case s3PutBucketVersioning:
+ case s3PutBucketWebsite:
+ case s3PutLifecycleConfiguration:
+ case s3PutObjectAcl:
+ case s3PutObjectVersionAcl:
+ case s3PutReplicationConfiguration:
+ return RGW_PERM_WRITE_ACP;
+
+ case s3All:
+ return RGW_PERM_FULL_CONTROL;
+ }
+ return RGW_PERM_INVALID;
+}
+}
+
+using Environment = boost::container::flat_map<std::string, std::string>;
+
+enum struct Partition {
+ aws, aws_cn, aws_us_gov, wildcard
+ // If we wanted our own ARNs for principal type unique to us
+ // (maybe to integrate better with Swift) or for anything else we
+ // provide that doesn't map onto S3, we could add an 'rgw'
+ // partition type.
+};
+
+enum struct Service {
+ apigateway, appstream, artifact, autoscaling, aws_portal, acm,
+ cloudformation, cloudfront, cloudhsm, cloudsearch, cloudtrail,
+ cloudwatch, events, logs, codebuild, codecommit, codedeploy,
+ codepipeline, cognito_idp, cognito_identity, cognito_sync,
+ config, datapipeline, dms, devicefarm, directconnect,
+ ds, dynamodb, ec2, ecr, ecs, ssm, elasticbeanstalk, elasticfilesystem,
+ elasticloadbalancing, elasticmapreduce, elastictranscoder, elasticache,
+ es, gamelift, glacier, health, iam, importexport, inspector, iot,
+ kms, kinesisanalytics, firehose, kinesis, lambda, lightsail,
+ machinelearning, aws_marketplace, aws_marketplace_management,
+ mobileanalytics, mobilehub, opsworks, opsworks_cm, polly,
+ redshift, rds, route53, route53domains, sts, servicecatalog,
+ ses, sns, sqs, s3, swf, sdb, states, storagegateway, support,
+ trustedadvisor, waf, workmail, workspaces, wildcard
+};
+
+struct ARN {
+ Partition partition;
+ Service service;
+ std::string region;
+ // Once we refity tenant, we should probably use that instead of a
+ // string.
+ std::string account;
+ std::string resource;
+
+ ARN()
+ : partition(Partition::wildcard), service(Service::wildcard) {}
+ ARN(Partition partition, Service service, std::string region,
+ std::string account, std::string resource)
+ : partition(partition), service(service), region(std::move(region)),
+ account(std::move(account)), resource(std::move(resource)) {}
+ ARN(const rgw_obj& o);
+ ARN(const rgw_bucket& b);
+ ARN(const rgw_bucket& b, const std::string& o);
+
+ static boost::optional<ARN> parse(const std::string& s,
+ bool wildcard = false);
+ std::string to_string() const;
+
+ // `this` is the pattern
+ bool match(const ARN& candidate) const;
+};
+
+inline std::string to_string(const ARN& a) {
+ return a.to_string();
+}
+
+inline std::ostream& operator <<(std::ostream& m, const ARN& a) {
+ return m << to_string(a);
+}
+
+bool operator ==(const ARN& l, const ARN& r);
+bool operator <(const ARN& l, const ARN& r);
+
+using Address = std::bitset<128>;
+struct MaskedIP {
+ bool v6;
+ Address addr;
+ // Since we're mapping IPv6 to IPv4 addresses, we may want to
+ // consider making the prefix always be in terms of a v6 address
+ // and just use the v6 bit to rewrite it as a v4 prefix for
+ // output.
+ unsigned int prefix;
+};
+
+std::ostream& operator <<(std::ostream& m, const MaskedIP& ip);
+string to_string(const MaskedIP& m);
+
+inline bool operator ==(const MaskedIP& l, const MaskedIP& r) {
+ auto shift = std::max((l.v6 ? 128 : 32) - l.prefix,
+ (r.v6 ? 128 : 32) - r.prefix);
+ ceph_assert(shift > 0);
+ return (l.addr >> shift) == (r.addr >> shift);
+}
+
+struct Condition {
+ TokenID op;
+ // Originally I was going to use a perfect hash table, but Marcus
+ // says keys are to be added at run-time not compile time.
+
+ // In future development, use symbol internment.
+ std::string key;
+ bool ifexists = false;
+ // Much to my annoyance there is no actual way to do this in a
+ // typed way that is compatible with AWS. I know this because I've
+ // seen examples where the same value is used as a string in one
+ // context and a date in another.
+ std::vector<std::string> vals;
+
+ Condition() = default;
+ Condition(TokenID op, const char* s, std::size_t len) : op(op) {
+ static constexpr char ifexistr[] = "IfExists";
+ auto l = static_cast<const char*>(memmem(static_cast<const void*>(s), len,
+ static_cast<const void*>(ifexistr),
+ sizeof(ifexistr) -1));
+ if (l && ((l + sizeof(ifexistr) - 1 == (s + len)))) {
+ ifexists = true;
+ key.assign(s, static_cast<const char*>(l) - s);
+ } else {
+ key.assign(s, len);
+ }
+ }
+
+ bool eval(const Environment& e) const;
+
+ static boost::optional<double> as_number(const std::string& s) {
+ std::size_t p = 0;
+
+ try {
+ double d = std::stod(s, &p);
+ if (p < s.length()) {
+ return boost::none;
+ }
+
+ return d;
+ } catch (const std::logic_error& e) {
+ return boost::none;
+ }
+ }
+
+ static boost::optional<ceph::real_time> as_date(const std::string& s) {
+ std::size_t p = 0;
+
+ try {
+ double d = std::stod(s, &p);
+ if (p == s.length()) {
+ return ceph::real_time(
+ std::chrono::seconds(static_cast<uint64_t>(d)) +
+ std::chrono::nanoseconds(
+ static_cast<uint64_t>((d - static_cast<uint64_t>(d))
+ * 1000000000)));
+ }
+
+ return from_iso_8601(boost::string_ref(s), false);
+ } catch (const std::logic_error& e) {
+ return boost::none;
+ }
+ }
+
+ static boost::optional<bool> as_bool(const std::string& s) {
+ std::size_t p = 0;
+
+ if (s.empty() || boost::iequals(s, "false")) {
+ return false;
+ }
+
+ try {
+ double d = std::stod(s, &p);
+ if (p == s.length()) {
+ return !((d == +0.0) || (d = -0.0) || std::isnan(d));
+ }
+ } catch (const std::logic_error& e) {
+ // Fallthrough
+ }
+
+ return true;
+ }
+
+ static boost::optional<ceph::bufferlist> as_binary(const std::string& s) {
+ // In a just world
+ ceph::bufferlist base64;
+ // I could populate a bufferlist
+ base64.push_back(buffer::create_static(
+ s.length(),
+ const_cast<char*>(s.data()))); // Yuck
+ // From a base64 encoded std::string.
+ ceph::bufferlist bin;
+
+ try {
+ base64.decode_base64(bin);
+ } catch (const ceph::buffer::malformed_input& e) {
+ return boost::none;
+ }
+ return bin;
+ }
+
+ static boost::optional<MaskedIP> as_network(const std::string& s);
+
+
+ struct ci_equal_to : public std::binary_function<const std::string,
+ const std::string,
+ bool> {
+ bool operator ()(const std::string& s1,
+ const std::string& s2) const {
+ return boost::iequals(s1, s2);
+ }
+ };
+
+
+ template<typename F>
+ static bool orrible(F&& f, const std::string& c,
+ const std::vector<std::string>& v) {
+ for (const auto& d : v) {
+ if (std::forward<F>(f)(c, d)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ template<typename F, typename X>
+ static bool shortible(F&& f, X& x, const std::string& c,
+ const std::vector<std::string>& v) {
+ auto xc = std::forward<X>(x)(c);
+ if (!xc) {
+ return false;
+ }
+
+ for (const auto& d : v) {
+ auto xd = std::forward<X>(x)(d);
+ if (!xd) {
+ continue;
+ }
+
+ if (std::forward<F>(f)(*xc, *xd)) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+std::ostream& operator <<(std::ostream& m, const Condition& c);
+
+std::string to_string(const Condition& c);
+
+struct Statement {
+ boost::optional<std::string> sid = boost::none;
+
+ boost::container::flat_set<rgw::auth::Principal> princ;
+ boost::container::flat_set<rgw::auth::Principal> noprinc;
+
+ // Every statement MUST provide an effect. I just initialize it to
+ // deny as defensive programming.
+ Effect effect = Effect::Deny;
+
+ std::uint64_t action = 0;
+ std::uint64_t notaction = 0;
+
+ boost::container::flat_set<ARN> resource;
+ boost::container::flat_set<ARN> notresource;
+
+ std::vector<Condition> conditions;
+
+ Effect eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const;
+};
+
+std::ostream& operator <<(ostream& m, const Statement& s);
+std::string to_string(const Statement& s);
+
+struct PolicyParseException : public std::exception {
+ rapidjson::ParseResult pr;
+
+ PolicyParseException(rapidjson::ParseResult&& pr)
+ : pr(pr) { }
+ const char* what() const noexcept override {
+ return rapidjson::GetParseError_En(pr.Code());
+ }
+};
+
+struct Policy {
+ std::string text;
+ Version version = Version::v2008_10_17;
+ boost::optional<std::string> id = boost::none;
+
+ std::vector<Statement> statements;
+
+ Policy(CephContext* cct, const std::string& tenant,
+ const bufferlist& text);
+
+ Effect eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const;
+};
+
+std::ostream& operator <<(ostream& m, const Policy& p);
+std::string to_string(const Policy& p);
+}
+}
+
+namespace std {
+template<>
+struct hash<::rgw::IAM::Service> {
+ size_t operator()(const ::rgw::IAM::Service& s) const noexcept {
+ // Invoke a default-constructed hash object for int.
+ return hash<int>()(static_cast<int>(s));
+ }
+};
+}
+
+#endif
diff --git a/src/rgw/rgw_iam_policy_keywords.gperf b/src/rgw/rgw_iam_policy_keywords.gperf
new file mode 100644
index 00000000000..d37fa6aff65
--- /dev/null
+++ b/src/rgw/rgw_iam_policy_keywords.gperf
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+%language=C++
+%define class-name keyword_hash
+%define lookup-function-name lookup
+%struct-type
+struct Keyword {
+ const char* name;
+ TokenKind kind;
+ TokenID id;
+ uint64_t specific;
+ bool arrayable;
+ bool objectable;
+};
+%%
+# Top-level
+#
+Version, TokenKind::top, TokenID::Version, 0, false, false
+Id, TokenKind::top, TokenID::Id, 0, false, false
+Statement, TokenKind::top, TokenID::Statement, 0, true, true
+#
+# Statement level
+#
+Sid, TokenKind::statement, TokenID::Sid, 0, false, false
+Effect, TokenKind::statement, TokenID::Effect, 0, false, false
+Principal, TokenKind::statement, TokenID::Principal, 0, false, true
+NotPrincipal, TokenKind::statement, TokenID::NotPrincipal, 0, true, true
+Action, TokenKind::statement, TokenID::Action, 0, true, false
+NotAction, TokenKind::statement, TokenID::NotAction, 0, true, false
+Resource, TokenKind::statement, TokenID::Resource, 0, true, false
+NotResource, TokenKind::statement, TokenID::NotResource, 0, true, false
+Condition, TokenKind::statement, TokenID::Condition, 0, true, true
+#
+# Condition operators
+#
+# String
+StringEquals, TokenKind::cond_op, TokenID::StringEquals, (uint64_t) Type::string, true, true
+StringNotEquals, TokenKind::cond_op, TokenID::StringNotEquals, (uint64_t) Type::string, true, true
+StringEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringEqualsIgnoreCase, (uint64_t) Type::string, true, true
+StringNotEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringNotEqualsIgnoreCase, (uint64_t) Type::string, true, true
+StringLike, TokenKind::cond_op, TokenID::StringLike, (uint64_t) Type::string, true, true,
+StringNotLike, TokenKind::cond_op, TokenID::StringNotLike, (uint64_t) Type::string, true, true
+# Numeric
+NumericEquals, TokenKind::cond_op, TokenID::NumericEquals, (uint64_t) Type::number, true, true
+NumericNotEquals, TokenKind::cond_op, TokenID::NumericNotEquals, (uint64_t) Type::number, true, true
+NumericLessThan, TokenKind::cond_op, TokenID::NumericLessThan, (uint64_t) Type::number, true, true
+NumericLessThanEquals, TokenKind::cond_op, TokenID::NumericLessThanEquals, (uint64_t) Type::number, true, true
+NumericGreaterThan, TokenKind::cond_op, TokenID::NumericGreaterThan, (uint64_t) Type::number, true, true
+NumericGreaterThanEquals, TokenKind::cond_op, TokenID::NumericGreaterThanEquals, (uint64_t) Type::number, true, true
+# Date
+DateEquals, TokenKind::cond_op, TokenID::DateEquals, (uint64_t) Type::date, true, true
+DateNotEquals, TokenKind::cond_op, TokenID::DateNotEquals, (uint64_t) Type::date, true, true
+DateLessThan, TokenKind::cond_op, TokenID::DateLessThan, (uint64_t) Type::date, true, true
+DateLessThanEquals, TokenKind::cond_op, TokenID::DateLessThanEquals, (uint64_t) Type::date, true, true
+DateGreaterThan, TokenKind::cond_op, TokenID::DateGreaterThan, (uint64_t) Type::date, true, true
+DateGreaterThanEquals, TokenKind::cond_op, TokenID::DateGreaterThanEquals, (uint64_t) Type::date, true, true
+# Bool
+Bool, TokenKind::cond_op, TokenID::Bool, (uint64_t) Type::boolean, true, true
+# Binary
+BinaryEquals, TokenKind::cond_op, TokenID::BinaryEquals, (uint64_t) Type::binary, true, true
+# IP Address
+IpAddress, TokenKind::cond_op, TokenID::IpAddress, (uint64_t) Type::ipaddr, true, true
+NotIpAddress, TokenKind::cond_op, TokenID::NotIpAddress, (uint64_t) Type::ipaddr, true, true
+# Amazon Resource Names
+ArnEquals, TokenKind::cond_op, TokenID::ArnEquals, (uint64_t) Type::arn, true, true
+ArnNotEquals, TokenKind::cond_op, TokenID::ArnNotEquals, (uint64_t) Type::arn, true, true
+ArnLike, TokenKind::cond_op, TokenID::ArnLike, (uint64_t) Type::arn, true, true
+ArnNotLike, TokenKind::cond_op, TokenID::ArnNotLike, (uint64_t) Type::arn, true, true
+# Null
+Null, TokenKind::cond_op, TokenID::Null, (uint64_t) Type::null, true, true
+#
+# Condition keys
+#
+# AWS
+#aws:CurrentTime, TokenKind::cond_key, TokenID::awsCurrentTime, (uint64_t) Type::date, true, false
+#aws:EpochTime, TokenKind::cond_key, TokenID::awsEpochTime, (uint64_t) Type::date, true, false
+#aws:TokenIssueTime, TokenKind::cond_key, TokenID::awsTokenIssueTime, (uint64_t) Type::date, true, false
+#aws:MultiFactorAuthPresent, TokenKind::cond_key, TokenID::awsMultiFactorAuthPresent, (uint64_t) Type::boolean, true, false
+#aws:MultiFactorAuthAge, TokenKind::cond_key, TokenID::awsMultiFactorAuthAge, (uint64_t) Type::number, true, false
+#aws:PrincipalType, TokenKind::cond_key, TokenID::awsPrincipalType, (uint64_t) Type::string, true, false
+#aws:Referer, TokenKind::cond_key, TokenID::awsReferer, (uint64_t) Type::string, true, false
+#aws:SecureTransport, TokenKind::cond_key, TokenID::awsSecureTransport, (uint64_t) Type::boolean, true, false
+#aws:SourceArn, TokenKind::cond_key, TokenID::awsSourceArn, (uint64_t) Type::arn, true, false
+#aws:SourceIp, TokenKind::cond_key, TokenID::awsSourceIp, (uint64_t) Type::ipaddr, true, false
+#aws:SourceVpc, TokenKind::cond_key, TokenID::awsSourceVpc, (uint64_t) Type::string, true, false
+#aws:SourceVpce, TokenKind::cond_key, TokenID::awsSourceVpce, (uint64_t) Type::string, true, false
+#aws:UserAgent, TokenKind::cond_key, TokenID::awsUserAgent, (uint64_t) Type::string, true, false
+#aws:userid, TokenKind::cond_key, TokenID::awsuserid, (uint64_t) Type::string, true, false
+#aws:username, TokenKind::cond_key, TokenID::awsusername, (uint64_t) Type::string, true, false
+# S3
+#s3:x-amz-acl, TokenKind::cond_key, TokenID::s3x_amz_acl, (uint64_t) Type::string, true, false
+#s3:x-amz-grant-read, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-write, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-read-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-write-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-full-control, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-copy-source, TokenKind::cond_key, TokenID::s3x_amz_copy_source, (uint64_t) Type::string, true, false
+#s3:x-amz-server-side-encryption, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption, (uint64_t) Type::boolean, true, false
+#s3:x-amz-server-side-encryption-aws-kms-key-id, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption_aws_kms_key_id, (uint64_t) Type::arn, true, false
+#s3:x-amz-metadata-directive, TokenKind::cond_key, TokenID::s3x_amz_metadata_directive, (uint64_t) Type::string, true, false
+#s3:x-amz-storage-class, TokenKind::cond_key, TokenID::s3x_amz_storage_class, (uint64_t) Type::string, true, false
+#s3:VersionId, TokenKind::cond_key, TokenID::s3VersionId, (uint64_t) Type::string, true, false
+#s3:LocationConstraint, TokenKind::cond_key, TokenID::s3LocationConstraint, (uint64_t) Type::string, true, false
+#s3:prefix, TokenKind::cond_key, TokenID::s3prefix, (uint64_t) Type::string, true, false
+#s3:delimiter, TokenKind::cond_key, TokenID::s3delimiter, (uint64_t) Type::string, true, false
+#s3:max-keys, TokenKind::cond_key, TokenID::s3max_keys, (uint64_t) Type::number, true, false
+#s3:signatureversion, TokenKind::cond_key, TokenID::s3signatureversion, (uint64_t) Type::string, true, false
+#s3:authType, TokenKind::cond_key, TokenID::s3authType, (uint64_t) Type::string, true, false
+#s3:signatureAge, TokenKind::cond_key, TokenID::s3signatureAge, (uint64_t) Type::number, true, false
+#s3:x-amz-content-sha256, TokenKind::cond_key, TokenID::s3x_amz_content_sha256, (uint64_t) Type::string, true, false
+#
+# Version Keywords
+#
+2008-10-17, TokenKind::version_key, TokenID::v2008_10_17, (uint64_t) Version::v2008_10_17, false, false
+2012-10-17, TokenKind::version_key, TokenID::v2012_10_17, (uint64_t) Version::v2012_10_17, false, false
+#
+# Effect Keywords
+#
+Allow, TokenKind::effect_key, TokenID::Allow, (uint64_t) Effect::Allow, false, false
+Deny, TokenKind::effect_key, TokenID::Deny, (uint64_t) Effect::Deny, false, false
+#
+# Principal types
+#
+AWS, TokenKind::princ_type, TokenID::AWS, 0, true, false
+Federated, TokenKind::princ_type, TokenID::Federated, 0, true, false
+Service, TokenKind::princ_type, TokenID::Service, 0, true, false
+CanonicalUser, TokenKind::princ_type, TokenID::CanonicalUser, 0, true, false
diff --git a/src/rgw/rgw_iam_policy_keywords.h b/src/rgw/rgw_iam_policy_keywords.h
new file mode 100644
index 00000000000..a0cd34b6286
--- /dev/null
+++ b/src/rgw/rgw_iam_policy_keywords.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_POLICY_S3V2_KEYWORDS_H
+#define CEPH_RGW_POLICY_S3V2_KEYWORDS_H
+
+namespace rgw {
+namespace IAM {
+
+enum class TokenKind {
+ pseudo, top, statement, cond_op, cond_key, version_key, effect_key,
+ princ_type
+};
+
+enum class TokenID {
+ /// Pseudo-token
+ Top,
+
+ /// Top-level tokens
+ Version, Id, Statement,
+
+ /// Statement level tokens
+ Sid, Effect, Principal, NotPrincipal, Action, NotAction,
+ Resource, NotResource, Condition,
+
+ /// Condition Operators!
+ /// Any of these, except Null, can have an IfExists variant.
+
+ // String!
+ StringEquals, StringNotEquals, StringEqualsIgnoreCase,
+ StringNotEqualsIgnoreCase, StringLike, StringNotLike,
+
+ // Numeric!
+ NumericEquals, NumericNotEquals, NumericLessThan, NumericLessThanEquals,
+ NumericGreaterThan, NumericGreaterThanEquals,
+
+ // Date!
+ DateEquals, DateNotEquals, DateLessThan, DateLessThanEquals,
+ DateGreaterThan, DateGreaterThanEquals,
+
+ // Bool!
+ Bool,
+
+ // Binary!
+ BinaryEquals,
+
+ // IP Address!
+ IpAddress, NotIpAddress,
+
+ // Amazon Resource Names! (Does S3 need this?)
+ ArnEquals, ArnNotEquals, ArnLike, ArnNotLike,
+
+ // Null!
+ Null,
+
+#if 0 // Keys are done at runtime now
+
+ /// Condition Keys!
+ awsCurrentTime,
+ awsEpochTime,
+ awsTokenIssueTime,
+ awsMultiFactorAuthPresent,
+ awsMultiFactorAuthAge,
+ awsPrincipalType,
+ awsReferer,
+ awsSecureTransport,
+ awsSourceArn,
+ awsSourceIp,
+ awsSourceVpc,
+ awsSourceVpce,
+ awsUserAgent,
+ awsuserid,
+ awsusername,
+ s3x_amz_acl,
+ s3x_amz_grant_permission,
+ s3x_amz_copy_source,
+ s3x_amz_server_side_encryption,
+ s3x_amz_server_side_encryption_aws_kms_key_id,
+ s3x_amz_metadata_directive,
+ s3x_amz_storage_class,
+ s3VersionId,
+ s3LocationConstraint,
+ s3prefix,
+ s3delimiter,
+ s3max_keys,
+ s3signatureversion,
+ s3authType,
+ s3signatureAge,
+ s3x_amz_content_sha256,
+#else
+ CondKey,
+#endif
+
+ ///
+ /// Versions!
+ ///
+ v2008_10_17,
+ v2012_10_17,
+
+ ///
+ /// Effects!
+ ///
+ Allow,
+ Deny,
+
+ /// Principal Types!
+ AWS,
+ Federated,
+ Service,
+ CanonicalUser
+};
+
+
+enum class Version {
+ v2008_10_17,
+ v2012_10_17
+};
+
+
+enum class Effect {
+ Allow,
+ Deny,
+ Pass
+};
+
+enum class Type {
+ string,
+ number,
+ date,
+ boolean,
+ binary,
+ ipaddr,
+ arn,
+ null
+};
+}
+}
+
+#endif // CEPH_RGW_POLICY_S3V2_KEYWORDS_H
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index 067ad6fadbb..2a183b59195 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -1290,6 +1290,7 @@ void rgw_meta_sync_marker::decode_json(JSONObj *obj)
utime_t ut;
JSONDecoder::decode_json("timestamp", ut, obj);
timestamp = ut.to_real_time();
+ JSONDecoder::decode_json("realm_epoch", realm_epoch, obj);
}
void rgw_meta_sync_marker::dump(Formatter *f) const
@@ -1300,6 +1301,7 @@ void rgw_meta_sync_marker::dump(Formatter *f) const
encode_json("total_entries", total_entries, f);
encode_json("pos", pos, f);
encode_json("timestamp", utime_t(timestamp), f);
+ encode_json("realm_epoch", realm_epoch, f);
}
void rgw_meta_sync_status::decode_json(JSONObj *obj)
diff --git a/src/rgw/rgw_keystone.cc b/src/rgw/rgw_keystone.cc
index 933308240e2..3294380ba10 100644
--- a/src/rgw/rgw_keystone.cc
+++ b/src/rgw/rgw_keystone.cc
@@ -613,7 +613,7 @@ int TokenCache::RevokeThread::check_revoked()
bool TokenCache::going_down() const
{
- return (down_flag.read() != 0);
+ return down_flag;
}
void* TokenCache::RevokeThread::entry()
diff --git a/src/rgw/rgw_keystone.h b/src/rgw/rgw_keystone.h
index 5bacfb0da5e..df5650c5927 100644
--- a/src/rgw/rgw_keystone.h
+++ b/src/rgw/rgw_keystone.h
@@ -13,6 +13,8 @@
#include "rgw_http_client.h"
#include "common/Cond.h"
+#include <atomic>
+
int rgw_open_cms_envelope(CephContext *cct,
const std::string& src,
std::string& dst); /* out */
@@ -216,7 +218,7 @@ class TokenCache {
list<string>::iterator lru_iter;
};
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
class RevokeThread : public Thread {
friend class TokenCache;
@@ -271,7 +273,7 @@ class TokenCache {
}
~TokenCache() {
- down_flag.set(1);
+ down_flag = true;
revocator.stop();
revocator.join();
diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc
index 61d4cc94427..b70d2596f26 100644
--- a/src/rgw/rgw_lc.cc
+++ b/src/rgw/rgw_lc.cc
@@ -2,11 +2,13 @@
#include <iostream>
#include <map>
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string.hpp>
+
#include "common/Formatter.h"
#include <common/errno.h>
#include "auth/Crypto.h"
#include "cls/rgw/cls_rgw_client.h"
-#include "cls/refcount/cls_refcount_client.h"
#include "cls/lock/cls_lock_client.h"
#include "rgw_common.h"
#include "rgw_bucket.h"
@@ -198,21 +200,6 @@ bool RGWLC::if_already_run_today(time_t& start_date)
return false;
}
-static std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
- std::stringstream ss(s);
- std::string item;
- while (std::getline(ss, item, delim)) {
- elems.push_back(item);
- }
- return elems;
-}
-
-static std::vector<std::string> split(const std::string &s, char delim) {
- std::vector<std::string> elems;
- split(s, delim, elems);
- return elems;
-}
-
int RGWLC::bucket_lc_prepare(int index)
{
map<string, int > entries;
@@ -324,7 +311,7 @@ int RGWLC::bucket_lc_process(string& shard_id)
vector<rgw_bucket_dir_entry> objs;
RGWObjectCtx obj_ctx(store);
vector<std::string> result;
- result = split(shard_id, ':');
+ boost::split(result, shard_id, boost::is_any_of(":"));
string bucket_tenant = result[0];
string bucket_name = result[1];
string bucket_id = result[2];
@@ -496,8 +483,7 @@ int RGWLC::bucket_lc_process(string& shard_id)
return ret;
}
-int RGWLC::bucket_lc_post(int index, int max_lock_sec, cls_rgw_lc_obj_head& head,
- pair<string, int >& entry, int& result)
+int RGWLC::bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result)
{
utime_t lock_duration(cct->_conf->rgw_lc_lock_max_time, 0);
@@ -637,7 +623,7 @@ int RGWLC::process(int index, int max_lock_secs)
}
l.unlock(&store->lc_pool_ctx, obj_names[index]);
ret = bucket_lc_process(entry.first);
- ret = bucket_lc_post(index, max_lock_secs, head, entry, ret);
+ bucket_lc_post(index, max_lock_secs, entry, ret);
return 0;
exit:
l.unlock(&store->lc_pool_ctx, obj_names[index]);
@@ -655,7 +641,7 @@ void RGWLC::start_processor()
void RGWLC::stop_processor()
{
- down_flag.set(1);
+ down_flag = true;
if (worker) {
worker->stop();
worker->join();
@@ -672,7 +658,7 @@ void RGWLC::LCWorker::stop()
bool RGWLC::going_down()
{
- return (down_flag.read() != 0);
+ return down_flag;
}
bool RGWLC::LCWorker::should_work(utime_t& now)
diff --git a/src/rgw/rgw_lc.h b/src/rgw/rgw_lc.h
index 61edd78aac7..480ba3fe513 100644
--- a/src/rgw/rgw_lc.h
+++ b/src/rgw/rgw_lc.h
@@ -9,7 +9,6 @@
#include "common/debug.h"
#include "include/types.h"
-#include "include/atomic.h"
#include "include/rados/librados.hpp"
#include "common/Mutex.h"
#include "common/Cond.h"
@@ -19,7 +18,8 @@
#include "rgw_multi.h"
#include "cls/rgw/cls_rgw_types.h"
-using namespace std;
+#include <atomic>
+
#define HASH_PRIME 7877
#define MAX_ID_LEN 255
static string lc_oid_prefix = "lc";
@@ -227,7 +227,7 @@ class RGWLC {
RGWRados *store;
int max_objs;
string *obj_names;
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
string cookie;
class LCWorker : public Thread {
@@ -261,8 +261,7 @@ class RGWLC {
int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
int bucket_lc_prepare(int index);
int bucket_lc_process(string& shard_id);
- int bucket_lc_post(int index, int max_lock_sec, cls_rgw_lc_obj_head& head,
- pair<string, int >& entry, int& result);
+ int bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result);
bool going_down();
void start_processor();
void stop_processor();
diff --git a/src/rgw/rgw_lc_s3.h b/src/rgw/rgw_lc_s3.h
index c969ac979c9..52f6987e487 100644
--- a/src/rgw/rgw_lc_s3.h
+++ b/src/rgw/rgw_lc_s3.h
@@ -6,16 +6,10 @@
#include <iostream>
#include <include/types.h>
-#include <expat.h>
-
#include "include/str_list.h"
#include "rgw_lc.h"
#include "rgw_xml.h"
-
-
-using namespace std;
-
class LCID_S3 : public XMLObj
{
public:
diff --git a/src/rgw/rgw_ldap.cc b/src/rgw/rgw_ldap.cc
index d07b295153e..a39afa6c081 100644
--- a/src/rgw/rgw_ldap.cc
+++ b/src/rgw/rgw_ldap.cc
@@ -94,6 +94,7 @@ namespace rgw {
if (ret != LDAP_SUCCESS) {
ldout(g_ceph_context, 10)
<< __func__ << " simple_bind failed uid=" << uid
+ << "ldap err=" << ret
<< dendl;
}
ldap_memfree(dn);
diff --git a/src/rgw/rgw_loadgen_process.cc b/src/rgw/rgw_loadgen_process.cc
index 7f003facbb7..23e6fefca73 100644
--- a/src/rgw/rgw_loadgen_process.cc
+++ b/src/rgw/rgw_loadgen_process.cc
@@ -13,6 +13,8 @@
#include "rgw_loadgen.h"
#include "rgw_client_io.h"
+#include <atomic>
+
#define dout_subsys ceph_subsys_rgw
extern void signal_shutdown();
@@ -37,7 +39,7 @@ void RGWLoadGenProcess::run()
vector<string> buckets(num_buckets);
- atomic_t failed;
+ std::atomic<long int> failed = { 0 };
for (i = 0; i < num_buckets; i++) {
buckets[i] = "/loadgen";
@@ -51,7 +53,7 @@ void RGWLoadGenProcess::run()
string *objs = new string[num_objs];
- if (failed.read()) {
+ if (failed) {
derr << "ERROR: bucket creation failed" << dendl;
goto done;
}
@@ -69,7 +71,7 @@ void RGWLoadGenProcess::run()
checkpoint();
- if (failed.read()) {
+ if (failed) {
derr << "ERROR: bucket creation failed" << dendl;
goto done;
}
@@ -102,7 +104,7 @@ done:
void RGWLoadGenProcess::gen_request(const string& method,
const string& resource,
- int content_length, atomic_t* fail_flag)
+ int content_length, std::atomic<long int>* fail_flag)
{
RGWLoadGenRequest* req =
new RGWLoadGenRequest(store->get_new_req_id(), method, resource,
@@ -138,7 +140,7 @@ void RGWLoadGenProcess::handle_request(RGWRequest* r)
dout(20) << "process_request() returned " << ret << dendl;
if (req->fail_flag) {
- req->fail_flag->inc();
+ req->fail_flag++;
}
}
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index 153ddb060d7..51dbd1bbe93 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -218,7 +218,7 @@ static void log_usage(struct req_state *s, const string& op_name)
rgw_usage_data data(bytes_sent, bytes_received);
data.ops = 1;
- if (!error)
+ if (!s->is_err())
data.successful_ops = 1;
entry.add(op_name, data);
diff --git a/src/rgw/rgw_log.h b/src/rgw/rgw_log.h
index 5e612b7ca22..25e99e096de 100644
--- a/src/rgw/rgw_log.h
+++ b/src/rgw/rgw_log.h
@@ -3,7 +3,6 @@
#ifndef CEPH_RGW_LOG_H
#define CEPH_RGW_LOG_H
-
#include <boost/container/flat_map.hpp>
#include "rgw_common.h"
#include "include/utime.h"
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 1bec56553a6..01ef5556c22 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -53,13 +53,14 @@
#include "rgw_request.h"
#include "rgw_process.h"
#include "rgw_frontend.h"
-#if defined(WITH_RADOSGW_ASIO_FRONTEND)
+#if defined(WITH_RADOSGW_BEAST_FRONTEND)
#include "rgw_asio_frontend.h"
-#endif /* WITH_RADOSGW_ASIO_FRONTEND */
+#endif /* WITH_RADOSGW_BEAST_FRONTEND */
#include <map>
#include <string>
#include <vector>
+#include <atomic>
#include "include/types.h"
#include "common/BackTrace.h"
@@ -77,11 +78,11 @@ static sig_t sighandler_alrm;
class RGWProcess;
static int signal_fd[2] = {0, 0};
-static atomic_t disable_signal_fd;
+static std::atomic<int64_t> disable_signal_fd = { 0 };
void signal_shutdown()
{
- if (!disable_signal_fd.read()) {
+ if (!disable_signal_fd) {
int val = 0;
int ret = write(signal_fd[0], (char *)&val, sizeof(val));
if (ret < 0) {
@@ -470,8 +471,6 @@ int main(int argc, const char **argv)
RGWFrontend *fe = NULL;
if (framework == "civetweb" || framework == "mongoose") {
- int port;
- config->get_val("port", 80, &port);
std::string uri_prefix;
config->get_val("prefix", "", &uri_prefix);
@@ -489,9 +488,9 @@ int main(int argc, const char **argv)
fe = new RGWLoadGenFrontend(env, config);
}
-#if defined(WITH_RADOSGW_ASIO_FRONTEND)
- else if ((framework == "asio") &&
- cct->check_experimental_feature_enabled("rgw-asio-frontend")) {
+#if defined(WITH_RADOSGW_BEAST_FRONTEND)
+ else if ((framework == "beast") &&
+ cct->check_experimental_feature_enabled("rgw-beast-frontend")) {
int port;
config->get_val("port", 80, &port);
std::string uri_prefix;
@@ -499,7 +498,7 @@ int main(int argc, const char **argv)
RGWProcessEnv env{ store, &rest, olog, port, uri_prefix, auth_registry };
fe = new RGWAsioFrontend(env);
}
-#endif /* WITH_RADOSGW_ASIO_FRONTEND */
+#endif /* WITH_RADOSGW_BEAST_FRONTEND */
#if defined(WITH_RADOSGW_FCGI_FRONTEND)
else if (framework == "fastcgi" || framework == "fcgi") {
std::string uri_prefix;
diff --git a/src/rgw/rgw_meta_sync_status.h b/src/rgw/rgw_meta_sync_status.h
index e913e8ffb21..e34bb05be1d 100644
--- a/src/rgw/rgw_meta_sync_status.h
+++ b/src/rgw/rgw_meta_sync_status.h
@@ -55,28 +55,33 @@ struct rgw_meta_sync_marker {
uint64_t total_entries;
uint64_t pos;
real_time timestamp;
+ epoch_t realm_epoch{0}; //< realm_epoch of period marker
rgw_meta_sync_marker() : state(FullSync), total_entries(0), pos(0) {}
void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(state, bl);
::encode(marker, bl);
::encode(next_step_marker, bl);
::encode(total_entries, bl);
::encode(pos, bl);
::encode(timestamp, bl);
+ ::encode(realm_epoch, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
::decode(state, bl);
::decode(marker, bl);
::decode(next_step_marker, bl);
::decode(total_entries, bl);
::decode(pos, bl);
::decode(timestamp, bl);
+ if (struct_v >= 2) {
+ ::decode(realm_epoch, bl);
+ }
DECODE_FINISH(bl);
}
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc
index e26201bc3be..0a0f3dec324 100644
--- a/src/rgw/rgw_metadata.cc
+++ b/src/rgw/rgw_metadata.cc
@@ -313,28 +313,6 @@ public:
static RGWMetadataTopHandler md_top_handler;
-static const std::string mdlog_history_oid = "meta.history";
-
-struct RGWMetadataLogHistory {
- epoch_t oldest_realm_epoch;
- std::string oldest_period_id;
-
- void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
- ::encode(oldest_realm_epoch, bl);
- ::encode(oldest_period_id, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::iterator& p) {
- DECODE_START(1, p);
- ::decode(oldest_realm_epoch, p);
- ::decode(oldest_period_id, p);
- DECODE_FINISH(p);
- }
-};
-WRITE_CLASS_ENCODER(RGWMetadataLogHistory)
-
-
RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store)
: cct(_cct), store(_store)
{
@@ -351,15 +329,18 @@ RGWMetadataManager::~RGWMetadataManager()
handlers.clear();
}
+const std::string RGWMetadataLogHistory::oid = "meta.history";
+
namespace {
-int read_history(RGWRados *store, RGWMetadataLogHistory *state)
+int read_history(RGWRados *store, RGWMetadataLogHistory *state,
+ RGWObjVersionTracker *objv_tracker)
{
RGWObjectCtx ctx{store};
auto& pool = store->get_zone_params().log_pool;
- const auto& oid = mdlog_history_oid;
+ const auto& oid = RGWMetadataLogHistory::oid;
bufferlist bl;
- int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, nullptr, nullptr);
+ int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr);
if (ret < 0) {
return ret;
}
@@ -375,19 +356,141 @@ int read_history(RGWRados *store, RGWMetadataLogHistory *state)
}
int write_history(RGWRados *store, const RGWMetadataLogHistory& state,
- bool exclusive = false)
+ RGWObjVersionTracker *objv_tracker, bool exclusive = false)
{
bufferlist bl;
state.encode(bl);
auto& pool = store->get_zone_params().log_pool;
- const auto& oid = mdlog_history_oid;
+ const auto& oid = RGWMetadataLogHistory::oid;
return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(),
- exclusive, nullptr, real_time{});
+ exclusive, objv_tracker, real_time{});
}
using Cursor = RGWPeriodHistory::Cursor;
+/// read the mdlog history and use it to initialize the given cursor
+class ReadHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ Cursor *cursor;
+ RGWObjVersionTracker *objv_tracker;
+ RGWMetadataLogHistory state;
+ public:
+ ReadHistoryCR(RGWRados *store, Cursor *cursor,
+ RGWObjVersionTracker *objv_tracker)
+ : RGWCoroutine(store->ctx()), store(store), cursor(cursor),
+ objv_tracker(objv_tracker)
+ {}
+
+ int operate() {
+ reenter(this) {
+ yield {
+ rgw_raw_obj obj{store->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+ constexpr bool empty_on_enoent = false;
+
+ using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
+ call(new ReadCR(store->get_async_rados(), store, obj,
+ &state, empty_on_enoent, objv_tracker));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to read mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ *cursor = store->period_history->lookup(state.oldest_realm_epoch);
+ if (!*cursor) {
+ return set_cr_error(cursor->get_error());
+ }
+
+ ldout(cct, 10) << "read mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// write the given cursor to the mdlog history
+class WriteHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ Cursor cursor;
+ RGWObjVersionTracker *objv;
+ RGWMetadataLogHistory state;
+ public:
+ WriteHistoryCR(RGWRados *store, const Cursor& cursor,
+ RGWObjVersionTracker *objv)
+ : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv)
+ {}
+
+ int operate() {
+ reenter(this) {
+ state.oldest_period_id = cursor.get_period().get_id();
+ state.oldest_realm_epoch = cursor.get_epoch();
+
+ yield {
+ rgw_raw_obj obj{store->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+
+ using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
+ call(new WriteCR(store->get_async_rados(), store, obj, state, objv));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to write mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldout(cct, 10) << "wrote mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// update the mdlog history to reflect trimmed logs
+class TrimHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ const Cursor cursor; //< cursor to trimmed period
+ RGWObjVersionTracker *objv; //< to prevent racing updates
+ Cursor next; //< target cursor for oldest log period
+ Cursor existing; //< existing cursor read from disk
+
+ public:
+ TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv)
+ : RGWCoroutine(store->ctx()),
+ store(store), cursor(cursor), objv(objv), next(cursor)
+ {
+ next.next(); // advance past cursor
+ }
+
+ int operate() {
+ reenter(this) {
+ // read an existing history, and write the new history if it's newer
+ yield call(new ReadHistoryCR(store, &existing, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ // reject older trims with ECANCELED
+ if (cursor.get_epoch() < existing.get_epoch()) {
+ ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
+ << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
+ return set_cr_error(-ECANCELED);
+ }
+ // overwrite with updated history
+ yield call(new WriteHistoryCR(store, next, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
// traverse all the way back to the beginning of the period history, and
// return a cursor to the first period in a fully attached history
Cursor find_oldest_period(RGWRados *store)
@@ -431,7 +534,8 @@ Cursor RGWMetadataManager::init_oldest_log_period()
{
// read the mdlog history
RGWMetadataLogHistory state;
- int ret = read_history(store, &state);
+ RGWObjVersionTracker objv;
+ int ret = read_history(store, &state, &objv);
if (ret == -ENOENT) {
// initialize the mdlog history and write it
@@ -446,7 +550,7 @@ Cursor RGWMetadataManager::init_oldest_log_period()
state.oldest_period_id = cursor.get_period().get_id();
constexpr bool exclusive = true; // don't overwrite
- int ret = write_history(store, state, exclusive);
+ int ret = write_history(store, state, &objv, exclusive);
if (ret < 0 && ret != -EEXIST) {
ldout(cct, 1) << "failed to write mdlog history: "
<< cpp_strerror(ret) << dendl;
@@ -486,7 +590,7 @@ Cursor RGWMetadataManager::init_oldest_log_period()
Cursor RGWMetadataManager::read_oldest_log_period() const
{
RGWMetadataLogHistory state;
- int ret = read_history(store, &state);
+ int ret = read_history(store, &state, nullptr);
if (ret < 0) {
ldout(store->ctx(), 1) << "failed to read mdlog history: "
<< cpp_strerror(ret) << dendl;
@@ -500,6 +604,18 @@ Cursor RGWMetadataManager::read_oldest_log_period() const
return store->period_history->lookup(state.oldest_realm_epoch);
}
+RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period,
+ RGWObjVersionTracker *objv) const
+{
+ return new ReadHistoryCR(store, period, objv);
+}
+
+RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period,
+ RGWObjVersionTracker *objv) const
+{
+ return new TrimHistoryCR(store, period, objv);
+}
+
int RGWMetadataManager::init(const std::string& current_period)
{
// open a log for the current period
diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h
index 8b7526399a8..4d077e8f888 100644
--- a/src/rgw/rgw_metadata.h
+++ b/src/rgw/rgw_metadata.h
@@ -18,6 +18,7 @@
class RGWRados;
+class RGWCoroutine;
class JSONObj;
struct RGWObjVersionTracker;
@@ -265,6 +266,27 @@ struct RGWMetadataLogData {
};
WRITE_CLASS_ENCODER(RGWMetadataLogData)
+struct RGWMetadataLogHistory {
+ epoch_t oldest_realm_epoch;
+ std::string oldest_period_id;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(oldest_realm_epoch, bl);
+ ::encode(oldest_period_id, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::iterator& p) {
+ DECODE_START(1, p);
+ ::decode(oldest_realm_epoch, p);
+ ::decode(oldest_period_id, p);
+ DECODE_FINISH(p);
+ }
+
+ static const std::string oid;
+};
+WRITE_CLASS_ENCODER(RGWMetadataLogHistory)
+
class RGWMetadataManager {
map<string, RGWMetadataHandler *> handlers;
CephContext *cct;
@@ -303,6 +325,16 @@ public:
/// period history
RGWPeriodHistory::Cursor read_oldest_log_period() const;
+ /// read the oldest log period asynchronously and write its result to the
+ /// given cursor pointer
+ RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period,
+ RGWObjVersionTracker *objv) const;
+
+ /// try to advance the oldest log period when the given period is trimmed,
+ /// using a rados lock to provide atomicity
+ RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period,
+ RGWObjVersionTracker *objv) const;
+
/// find or create the metadata log for the given period
RGWMetadataLog* get_log(const std::string& period);
diff --git a/src/rgw/rgw_object_expirer_core.cc b/src/rgw/rgw_object_expirer_core.cc
index 3bc033f0a68..fb0444186c9 100644
--- a/src/rgw/rgw_object_expirer_core.cc
+++ b/src/rgw/rgw_object_expirer_core.cc
@@ -230,7 +230,7 @@ bool RGWObjectExpirer::inspect_all_shards(const utime_t& last_run,
bool RGWObjectExpirer::going_down()
{
- return (down_flag.read() != 0);
+ return down_flag;
}
void RGWObjectExpirer::start_processor()
@@ -241,7 +241,7 @@ void RGWObjectExpirer::start_processor()
void RGWObjectExpirer::stop_processor()
{
- down_flag.set(1);
+ down_flag = true;
if (worker) {
worker->stop();
worker->join();
diff --git a/src/rgw/rgw_object_expirer_core.h b/src/rgw/rgw_object_expirer_core.h
index 83f0dd30754..6fe8d1410bf 100644
--- a/src/rgw/rgw_object_expirer_core.h
+++ b/src/rgw/rgw_object_expirer_core.h
@@ -8,6 +8,7 @@
#include <iostream>
#include <sstream>
#include <string>
+#include <atomic>
#include "auth/Crypto.h"
@@ -37,6 +38,8 @@
#include "rgw_usage.h"
#include "rgw_replica_log.h"
+#include <atomic>
+
class RGWObjectExpirer {
protected:
RGWRados *store;
@@ -65,7 +68,7 @@ protected:
};
OEWorker *worker;
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
public:
explicit RGWObjectExpirer(RGWRados *_store)
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 465f1e82b2d..6ddd6748a1b 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -3,12 +3,15 @@
#include <errno.h>
#include <stdlib.h>
+#include <system_error>
#include <unistd.h>
#include <sstream>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+#include <boost/bind.hpp>
#include "common/Clock.h"
#include "common/armor.h"
@@ -43,6 +46,7 @@
#include "compressor/Compressor.h"
+#include "rgw_acl_swift.h"
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
@@ -50,12 +54,21 @@
using namespace std;
using namespace librados;
using ceph::crypto::MD5;
+using boost::optional;
+using boost::none;
+
+using rgw::IAM::ARN;
+using rgw::IAM::Effect;
+using rgw::IAM::Policy;
+using rgw::IAM::Policy;
static string mp_ns = RGW_OBJ_NS_MULTIPART;
static string shadow_ns = RGW_OBJ_NS_SHADOW;
-static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store, bufferlist& in_data, JSONParser *jp);
+static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name);
+static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store,
+ bufferlist& in_data, JSONParser *jp, req_info *forward_info = nullptr);
static MultipartMetaFilter mp_filter;
@@ -137,9 +150,9 @@ static int decode_policy(CephContext *cct,
static int get_user_policy_from_attr(CephContext * const cct,
- RGWRados * const store,
- map<string, bufferlist>& attrs,
- RGWAccessControlPolicy& policy /* out */)
+ RGWRados * const store,
+ map<string, bufferlist>& attrs,
+ RGWAccessControlPolicy& policy /* out */)
{
auto aiter = attrs.find(RGW_ATTR_ACL);
if (aiter != attrs.end()) {
@@ -155,11 +168,11 @@ static int get_user_policy_from_attr(CephContext * const cct,
}
static int get_bucket_instance_policy_from_attr(CephContext *cct,
- RGWRados *store,
- RGWBucketInfo& bucket_info,
- map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
- rgw_raw_obj& obj)
+ RGWRados *store,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy,
+ rgw_raw_obj& obj)
{
map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_ACL);
@@ -181,12 +194,12 @@ static int get_bucket_instance_policy_from_attr(CephContext *cct,
}
static int get_obj_policy_from_attr(CephContext *cct,
- RGWRados *store,
- RGWObjectCtx& obj_ctx,
- RGWBucketInfo& bucket_info,
- map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
- rgw_obj& obj)
+ RGWRados *store,
+ RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy,
+ rgw_obj& obj)
{
bufferlist bl;
int ret = 0;
@@ -221,15 +234,27 @@ static int get_obj_policy_from_attr(CephContext *cct,
* Returns: 0 on success, -ERR# otherwise.
*/
static int get_bucket_policy_from_attr(CephContext *cct,
- RGWRados *store,
- RGWBucketInfo& bucket_info,
- map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy)
+ RGWRados *store,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy)
{
rgw_raw_obj instance_obj;
store->get_bucket_instance_obj(bucket_info.bucket, instance_obj);
return get_bucket_instance_policy_from_attr(cct, store, bucket_info, bucket_attrs,
- policy, instance_obj);
+ policy, instance_obj);
+}
+
+static optional<Policy> get_iam_policy_from_attr(CephContext* cct,
+ RGWRados* store,
+ map<string, bufferlist>& attrs,
+ const string& tenant) {
+ auto i = attrs.find(RGW_ATTR_IAM_POLICY);
+ if (i != attrs.end()) {
+ return Policy(cct, tenant, i->second);
+ } else {
+ return none;
+ }
}
static int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& obj, map<string, bufferlist>& attrs)
@@ -238,7 +263,6 @@ static int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& obj, map
RGWRados::Object::Read read_op(&op_target);
read_op.params.attrs = &attrs;
- read_op.params.perr = &s->err;
return read_op.prepare();
}
@@ -250,7 +274,6 @@ static int modify_obj_attr(RGWRados *store, struct req_state *s, rgw_obj& obj, c
RGWRados::Object::Read read_op(&op_target);
read_op.params.attrs = &attrs;
- read_op.params.perr = &s->err;
int r = read_op.prepare();
if (r < 0) {
@@ -302,7 +325,8 @@ static int read_obj_policy(RGWRados *store,
struct req_state *s,
RGWBucketInfo& bucket_info,
map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
+ RGWAccessControlPolicy* acl,
+ optional<Policy>& policy,
rgw_bucket& bucket,
rgw_obj_key& object)
{
@@ -324,9 +348,11 @@ static int read_obj_policy(RGWRados *store,
} else {
obj = rgw_obj(bucket, object);
}
+ policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs, bucket.tenant);
+
RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
int ret = get_obj_policy_from_attr(s->cct, store, *obj_ctx,
- bucket_info, bucket_attrs, policy, obj);
+ bucket_info, bucket_attrs, acl, obj);
if (ret == -ENOENT) {
/* object does not exist checking the bucket's ACL to make sure
that we send a proper error code */
@@ -361,7 +387,7 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s)
int ret = 0;
rgw_obj_key obj;
RGWUserInfo bucket_owner_info;
- RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ RGWObjectCtx obj_ctx(store);
string bi = s->info.args.get(RGW_SYS_PARAM_PREFIX "bucket-instance");
if (!bi.empty()) {
@@ -374,8 +400,13 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s)
if(s->dialect.compare("s3") == 0) {
s->bucket_acl = new RGWAccessControlPolicy_S3(s->cct);
} else if(s->dialect.compare("swift") == 0) {
- s->user_acl = std::unique_ptr<RGWAccessControlPolicy>(
- new RGWAccessControlPolicy_SWIFTAcct(s->cct));
+ /* We aren't allocating the account policy for those operations using
+ * the Swift's infrastructure that don't really need req_state::user.
+ * Typical example here is the implementation of /info. */
+ if (!s->user->user_id.empty()) {
+ s->user_acl = std::unique_ptr<RGWAccessControlPolicy>(
+ new RGWAccessControlPolicy_SWIFTAcct(s->cct));
+ }
s->bucket_acl = new RGWAccessControlPolicy_SWIFT(s->cct);
} else {
s->bucket_acl = new RGWAccessControlPolicy(s->cct);
@@ -497,6 +528,16 @@ int rgw_build_bucket_policies(RGWRados* store, struct req_state* s)
}
}
+ try {
+ s->iam_policy = get_iam_policy_from_attr(s->cct, store, s->bucket_attrs,
+ s->bucket_tenant);
+ } catch (const std::exception& e) {
+ // Really this is a can't happen condition. We parse the policy
+ // when it's given to us, so perhaps we should abort or otherwise
+ // raise bloody murder.
+ lderr(s->cct) << "Error reading IAM Policy: " << e.what() << dendl;
+ ret = -EACCES;
+ }
return ret;
}
@@ -524,12 +565,72 @@ int rgw_build_object_policies(RGWRados *store, struct req_state *s,
if (prefetch_data) {
store->set_prefetch_data(s->obj_ctx, obj);
}
- ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs, s->object_acl, s->bucket, s->object);
+ ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs, s->object_acl, s->iam_policy, s->bucket, s->object);
}
return ret;
}
+rgw::IAM::Environment rgw_build_iam_environment(RGWRados* store,
+ struct req_state* s)
+{
+ rgw::IAM::Environment e;
+ const auto& m = s->info.env->get_map();
+ auto t = ceph::real_clock::now();
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:CurrentTime"),
+ std::forward_as_tuple(std::to_string(
+ ceph::real_clock::to_time_t(t))));
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:EpochTime"),
+ std::forward_as_tuple(ceph::to_iso_8601(t)));
+ // TODO: This is fine for now, but once we have STS we'll need to
+ // look and see. Also this won't work with the IdentityApplier
+ // model, since we need to know the actual credential.
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:PrincipalType"),
+ std::forward_as_tuple("User"));
+
+ auto i = m.find("HTTP_REFERER");
+ if (i != m.end()) {
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:Referer"),
+ std::forward_as_tuple(i->second));
+ }
+
+ // These seem to be the semantics, judging from rest_rgw_s3.cc
+ i = m.find("SERVER_PORT_SECURE");
+ if (i != m.end()) {
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:SecureTransport"),
+ std::forward_as_tuple("true"));
+ }
+
+ i = m.find("HTTP_HOST");
+ if (i != m.end()) {
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:SourceIp"),
+ std::forward_as_tuple(i->second));
+ }
+
+ i = m.find("HTTP_USER_AGENT"); {
+ if (i != m.end())
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:UserAgent"),
+ std::forward_as_tuple(i->second));
+ }
+
+ if (s->user) {
+ // What to do about aws::userid? One can have multiple access
+ // keys so that isn't really suitable. Do we have a durable
+ // identifier that can persist through name changes?
+ e.emplace(std::piecewise_construct,
+ std::forward_as_tuple("aws:username"),
+ std::forward_as_tuple(s->user->user_id.id));
+ }
+ return e;
+}
+
static void rgw_bucket_object_pre_exec(struct req_state *s)
{
if (s->expect_cont)
@@ -546,7 +647,21 @@ int RGWGetObj::verify_permission()
store->set_prefetch_data(s->obj_ctx, obj);
}
- if (!verify_object_permission(s, RGW_PERM_READ)) {
+ if (torrent.get_flag()) {
+ if (obj.key.instance.empty()) {
+ action = rgw::IAM::s3GetObjectTorrent;
+ } else {
+ action = rgw::IAM::s3GetObjectVersionTorrent;
+ }
+ } else {
+ if (obj.key.instance.empty()) {
+ action = rgw::IAM::s3GetObject;
+ } else {
+ action = rgw::IAM::s3GetObjectVersion;
+ }
+ }
+
+ if (!verify_object_permission(s, action)) {
return -EACCES;
}
@@ -786,7 +901,8 @@ bool RGWOp::generate_cors_headers(string& origin, string& method, string& header
int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_policy,
+ RGWAccessControlPolicy * const bucket_acl,
+ const optional<Policy>& bucket_policy,
const off_t start_ofs,
const off_t end_ofs)
{
@@ -817,7 +933,6 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
read_op.conds.if_match = ent.meta.etag.c_str();
read_op.params.attrs = &attrs;
read_op.params.obj_size = &obj_size;
- read_op.params.perr = &s->err;
op_ret = read_op.prepare();
if (op_ret < 0)
@@ -862,8 +977,8 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
ldout(s->cct, 2) << "overriding permissions due to system operation" << dendl;
} else if (s->auth.identity->is_admin_of(s->user->user_id)) {
ldout(s->cct, 2) << "overriding permissions due to admin operation" << dendl;
- } else if (!verify_object_permission(s, s->user_acl.get(), bucket_policy,
- &obj_policy, RGW_PERM_READ)) {
+ } else if (!verify_object_permission(s, part, s->user_acl.get(), bucket_acl,
+ &obj_policy, bucket_policy, action)) {
return -EPERM;
}
@@ -885,13 +1000,15 @@ static int iterate_user_manifest_parts(CephContext * const cct,
const off_t end,
RGWBucketInfo *pbucket_info,
const string& obj_prefix,
- RGWAccessControlPolicy * const bucket_policy,
+ RGWAccessControlPolicy * const bucket_acl,
+ const optional<Policy>& bucket_policy,
uint64_t * const ptotal_len,
uint64_t * const pobj_size,
string * const pobj_sum,
int (*cb)(rgw_bucket& bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_policy,
+ RGWAccessControlPolicy * const bucket_acl,
+ const optional<Policy>& bucket_policy,
off_t start_ofs,
off_t end_ofs,
void *param),
@@ -947,7 +1064,7 @@ static int iterate_user_manifest_parts(CephContext * const cct,
len_count += end_ofs - start_ofs;
if (cb) {
- r = cb(bucket, ent, bucket_policy, start_ofs, end_ofs, cb_param);
+ r = cb(bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs, cb_param);
if (r < 0) {
return r;
}
@@ -973,13 +1090,12 @@ static int iterate_user_manifest_parts(CephContext * const cct,
}
struct rgw_slo_part {
- RGWAccessControlPolicy *bucket_policy;
+ RGWAccessControlPolicy *bucket_acl = nullptr;
+ Policy* bucket_policy = nullptr;
rgw_bucket bucket;
string obj_name;
- uint64_t size;
+ uint64_t size = 0;
string etag;
-
- rgw_slo_part() : bucket_policy(NULL), size(0) {}
};
static int iterate_slo_parts(CephContext *cct,
@@ -989,7 +1105,8 @@ static int iterate_slo_parts(CephContext *cct,
map<uint64_t, rgw_slo_part>& slo_parts,
int (*cb)(rgw_bucket& bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy *bucket_policy,
+ RGWAccessControlPolicy *bucket_acl,
+ const optional<Policy>& bucket_policy,
off_t start_ofs,
off_t end_ofs,
void *param),
@@ -1038,8 +1155,12 @@ static int iterate_slo_parts(CephContext *cct,
if (found_start) {
if (cb) {
- int r = cb(part.bucket, ent, part.bucket_policy, start_ofs, end_ofs, cb_param);
- if (r < 0)
+ // SLO is a Swift thing, and Swift has no knowledge of S3 Policies.
+ int r = cb(part.bucket, ent, part.bucket_acl,
+ (part.bucket_policy ?
+ optional<Policy>(*part.bucket_policy) : none),
+ start_ofs, end_ofs, cb_param);
+ if (r < 0)
return r;
}
}
@@ -1052,13 +1173,14 @@ static int iterate_slo_parts(CephContext *cct,
static int get_obj_user_manifest_iterate_cb(rgw_bucket& bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_policy,
+ RGWAccessControlPolicy * const bucket_acl,
+ const optional<Policy>& bucket_policy,
const off_t start_ofs,
const off_t end_ofs,
void * const param)
{
RGWGetObj *op = static_cast<RGWGetObj *>(param);
- return op->read_user_manifest_part(bucket, ent, bucket_policy, start_ofs, end_ofs);
+ return op->read_user_manifest_part(bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs);
}
int RGWGetObj::handle_user_manifest(const char *prefix)
@@ -1080,8 +1202,10 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
rgw_bucket bucket;
- RGWAccessControlPolicy _bucket_policy(s->cct);
- RGWAccessControlPolicy *bucket_policy;
+ RGWAccessControlPolicy _bucket_acl(s->cct);
+ RGWAccessControlPolicy *bucket_acl;
+ optional<Policy> _bucket_policy;
+ optional<Policy>* bucket_policy;
RGWBucketInfo bucket_info;
RGWBucketInfo *pbucket_info;
@@ -1098,16 +1222,20 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
}
bucket = bucket_info.bucket;
pbucket_info = &bucket_info;
- bucket_policy = &_bucket_policy;
- r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_policy, bucket);
+ bucket_acl = &_bucket_acl;
+ r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl, bucket);
if (r < 0) {
ldout(s->cct, 0) << "failed to read bucket policy" << dendl;
return r;
}
+ _bucket_policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs,
+ bucket_info.bucket.tenant);
+ bucket_policy = &_bucket_policy;
} else {
bucket = s->bucket;
pbucket_info = &s->bucket_info;
- bucket_policy = s->bucket_acl;
+ bucket_acl = s->bucket_acl;
+ bucket_policy = &s->iam_policy;
}
/* dry run to find out:
@@ -1115,7 +1243,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
* - overall DLO's content size,
* - md5 sum of overall DLO's content (for etag of Swift API). */
int r = iterate_user_manifest_parts(s->cct, store, ofs, end,
- pbucket_info, obj_prefix, bucket_policy,
+ pbucket_info, obj_prefix, bucket_acl, *bucket_policy,
&total_len, &s->obj_size, &lo_etag,
nullptr /* cb */, nullptr /* cb arg */);
if (r < 0) {
@@ -1129,7 +1257,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
}
r = iterate_user_manifest_parts(s->cct, store, ofs, end,
- pbucket_info, obj_prefix, bucket_policy,
+ pbucket_info, obj_prefix, bucket_acl, *bucket_policy,
nullptr, nullptr, nullptr,
get_obj_user_manifest_iterate_cb, (void *)this);
if (r < 0) {
@@ -1156,8 +1284,8 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl)
}
ldout(s->cct, 2) << "RGWGetObj::handle_slo_manifest()" << dendl;
- list<RGWAccessControlPolicy> allocated_policies;
- map<string, RGWAccessControlPolicy *> policies;
+ vector<RGWAccessControlPolicy> allocated_acls;
+ map<string, pair<RGWAccessControlPolicy *, optional<Policy>>> policies;
map<string, rgw_bucket> buckets;
map<uint64_t, rgw_slo_part> slo_parts;
@@ -1189,16 +1317,18 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl)
string obj_name = path.substr(pos_sep + 1);
rgw_bucket bucket;
- RGWAccessControlPolicy *bucket_policy;
+ RGWAccessControlPolicy *bucket_acl;
+ Policy* bucket_policy;
if (bucket_name.compare(s->bucket.name) != 0) {
const auto& piter = policies.find(bucket_name);
if (piter != policies.end()) {
- bucket_policy = piter->second;
- bucket = buckets[bucket_name];
+ bucket_acl = piter->second.first;
+ bucket_policy = piter->second.second.get_ptr();
+ bucket = buckets[bucket_name];
} else {
- allocated_policies.push_back(RGWAccessControlPolicy(s->cct));
- RGWAccessControlPolicy& _bucket_policy = allocated_policies.back();
+ allocated_acls.push_back(RGWAccessControlPolicy(s->cct));
+ RGWAccessControlPolicy& _bucket_acl = allocated_acls.back();
RGWBucketInfo bucket_info;
map<string, bufferlist> bucket_attrs;
@@ -1212,23 +1342,28 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl)
return r;
}
bucket = bucket_info.bucket;
- bucket_policy = &_bucket_policy;
- r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_policy,
+ bucket_acl = &_bucket_acl;
+ r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl,
bucket);
if (r < 0) {
- ldout(s->cct, 0) << "failed to read bucket policy for bucket "
+ ldout(s->cct, 0) << "failed to read bucket ACL for bucket "
<< bucket << dendl;
return r;
- }
- buckets[bucket_name] = bucket;
- policies[bucket_name] = bucket_policy;
+ }
+ auto _bucket_policy = get_iam_policy_from_attr(
+ s->cct, store, bucket_attrs, bucket_info.bucket.tenant);
+ bucket_policy = _bucket_policy.get_ptr();
+ buckets[bucket_name] = bucket;
+ policies[bucket_name] = make_pair(bucket_acl, _bucket_policy);
}
} else {
bucket = s->bucket;
- bucket_policy = s->bucket_acl;
+ bucket_acl = s->bucket_acl;
+ bucket_policy = s->iam_policy.get_ptr();
}
rgw_slo_part part;
+ part.bucket_acl = bucket_acl;
part.bucket_policy = bucket_policy;
part.bucket = bucket;
part.obj_name = obj_name;
@@ -1377,7 +1512,6 @@ void RGWGetObj::execute()
read_op.params.attrs = &attrs;
read_op.params.lastmod = &lastmod;
read_op.params.obj_size = &s->obj_size;
- read_op.params.perr = &s->err;
op_ret = read_op.prepare();
if (op_ret < 0)
@@ -1871,7 +2005,8 @@ void RGWDeleteBucketWebsite::execute()
int RGWStatBucket::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_READ)) {
+ // This (a HEAD request on a bucket) is governed by the s3:ListBucket permission.
+ if (!verify_bucket_permission(s, rgw::IAM::s3ListBucket)) {
return -EACCES;
}
@@ -1910,7 +2045,15 @@ void RGWStatBucket::execute()
int RGWListBucket::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_READ)) {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (!verify_bucket_permission(s,
+ list_versions ?
+ rgw::IAM::s3ListBucketVersions :
+ rgw::IAM::s3ListBucket)) {
return -EACCES;
}
@@ -1948,10 +2091,6 @@ void RGWListBucket::execute()
return;
}
- op_ret = get_params();
- if (op_ret < 0)
- return;
-
if (need_container_stats()) {
map<string, RGWBucketEnt> m;
m[s->bucket.name] = RGWBucketEnt();
@@ -2043,7 +2182,7 @@ int RGWCreateBucket::verify_permission()
static int forward_request_to_master(struct req_state *s, obj_version *objv,
RGWRados *store, bufferlist& in_data,
- JSONParser *jp)
+ JSONParser *jp, req_info *forward_info)
{
if (!store->rest_master_conn) {
ldout(s->cct, 0) << "rest connection is invalid" << dendl;
@@ -2053,9 +2192,8 @@ static int forward_request_to_master(struct req_state *s, obj_version *objv,
bufferlist response;
string uid_str = s->user->user_id.to_str();
#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response
- int ret = store->rest_master_conn->forward(uid_str, s->info, objv,
- MAX_REST_RESPONSE, &in_data,
- &response);
+ int ret = store->rest_master_conn->forward(uid_str, (forward_info ? *forward_info : s->info),
+ objv, MAX_REST_RESPONSE, &in_data, &response);
if (ret < 0)
return ret;
@@ -2327,7 +2465,7 @@ void RGWCreateBucket::execute()
bucket.name = s->bucket_name;
op_ret = store->select_bucket_placement(*(s->user), zonegroup_id,
placement_rule,
- bucket, &selected_placement_rule, nullptr);
+ &selected_placement_rule, nullptr);
if (selected_placement_rule != s->bucket_info.placement_rule) {
op_ret = -EEXIST;
return;
@@ -2475,7 +2613,7 @@ void RGWCreateBucket::execute()
int RGWDeleteBucket::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
+ if (!verify_bucket_permission(s, rgw::IAM::s3DeleteBucket)) {
return -EACCES;
}
@@ -2572,7 +2710,8 @@ int RGWPutObj::verify_permission()
{
if (copy_source) {
- RGWAccessControlPolicy cs_policy(s->cct);
+ RGWAccessControlPolicy cs_acl(s->cct);
+ optional<Policy> policy;
map<string, bufferlist> cs_attrs;
rgw_bucket cs_bucket(copy_source_bucket_info.bucket);
rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id);
@@ -2582,19 +2721,45 @@ int RGWPutObj::verify_permission()
store->set_prefetch_data(s->obj_ctx, obj);
/* check source object permissions */
- if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_policy, cs_bucket, cs_object) < 0) {
+ if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, policy,
+ cs_bucket, cs_object) < 0) {
return -EACCES;
}
/* admin request overrides permission checks */
- if (! s->auth.identity->is_admin_of(cs_policy.get_owner().get_id()) &&
- ! cs_policy.verify_permission(*s->auth.identity, s->perm_mask, RGW_PERM_READ)) {
- return -EACCES;
+ if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) {
+ if (policy) {
+ auto e = policy->eval(s->env, *s->auth.identity,
+ cs_object.instance.empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion,
+ rgw::IAM::ARN(obj));
+ if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (e == Effect::Pass &&
+ !cs_acl.verify_permission(*s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ } else if (!cs_acl.verify_permission(*s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
}
+ }
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ }
}
- if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
}
@@ -2667,7 +2832,7 @@ int RGWPutObjProcessor_Multipart::do_complete(size_t accounted_size,
map<string, bufferlist>& attrs,
real_time delete_at,
const char *if_match,
- const char *if_nomatch)
+ const char *if_nomatch, const string *user_data)
{
complete_writing_data();
@@ -3027,7 +3192,7 @@ void RGWPutObj::execute()
}
bufferlist &data = data_in;
- if (s->aws4_auth_streaming_mode) {
+ if (len && s->aws4_auth_streaming_mode) {
/* use unwrapped data */
data = s->aws4_auth->bl;
len = data.length();
@@ -3221,7 +3386,8 @@ void RGWPutObj::execute()
}
op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
- (delete_at ? *delete_at : real_time()), if_match, if_nomatch);
+ (delete_at ? *delete_at : real_time()), if_match, if_nomatch,
+ (user_data.empty() ? nullptr : &user_data));
/* produce torrent */
if (s->cct->_conf->rgw_torrent_flag && (ofs == torrent.get_data_len()))
@@ -3271,16 +3437,10 @@ void RGWPostObj::pre_exec()
void RGWPostObj::execute()
{
RGWPutObjDataProcessor *filter = nullptr;
- std::unique_ptr<RGWPutObjDataProcessor> encrypt;
- char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
- unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
- MD5 hash;
- buffer::list bl, aclbl;
- int len = 0;
boost::optional<RGWPutObj_Compress> compressor;
CompressorRef plugin;
- // read in the data from the POST form
+ /* Read in the data from the POST form. */
op_ret = get_params();
if (op_ret < 0) {
return;
@@ -3291,121 +3451,151 @@ void RGWPostObj::execute()
return;
}
- if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (e == Effect::Deny) {
+ op_ret = -EACCES;
+ return;
+ } else if (e == Effect::Pass && !verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
+ op_ret = -EACCES;
+ return;
+ }
+ } else if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
op_ret = -EACCES;
return;
}
- op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
- user_quota, bucket_quota, s->content_length);
- if (op_ret < 0) {
- return;
- }
-
- RGWPutObjProcessor_Atomic processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
- s->bucket_info,
- s->bucket,
- s->object.name,
- /* part size */
- s->cct->_conf->rgw_obj_stripe_size,
- s->req_id,
- s->bucket_info.versioning_enabled());
+ /* Start iteration over data fields. It's necessary as Swift's FormPost
+ * is capable to handle multiple files in single form. */
+ do {
+ std::unique_ptr<RGWPutObjDataProcessor> encrypt;
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ MD5 hash;
+ ceph::buffer::list bl, aclbl;
+ int len = 0;
+
+ op_ret = store->check_quota(s->bucket_owner.get_id(),
+ s->bucket,
+ user_quota,
+ bucket_quota,
+ s->content_length);
+ if (op_ret < 0) {
+ return;
+ }
- // no filters by default
- filter = &processor;
+ RGWPutObjProcessor_Atomic processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
+ s->bucket_info,
+ s->bucket,
+ get_current_filename(),
+ /* part size */
+ s->cct->_conf->rgw_obj_stripe_size,
+ s->req_id,
+ s->bucket_info.versioning_enabled());
+ /* No filters by default. */
+ filter = &processor;
- op_ret = processor.prepare(store, nullptr);
- if (op_ret < 0)
- return;
+ op_ret = processor.prepare(store, nullptr);
+ if (op_ret < 0) {
+ return;
+ }
- op_ret = get_encrypt_filter(&encrypt, filter);
- if (op_ret < 0) {
- return;
- }
- if (encrypt != nullptr) {
- filter = encrypt.get();
- } else {
- const auto& compression_type = store->get_zone_params().get_compression_type(
- s->bucket_info.placement_rule);
- if (compression_type != "none") {
- plugin = Compressor::create(s->cct, compression_type);
- if (!plugin) {
- ldout(s->cct, 1) << "Cannot load plugin for compression type "
- << compression_type << dendl;
- } else {
- compressor.emplace(s->cct, plugin, filter);
- filter = &*compressor;
+ op_ret = get_encrypt_filter(&encrypt, filter);
+ if (op_ret < 0) {
+ return;
+ }
+ if (encrypt != nullptr) {
+ filter = encrypt.get();
+ } else {
+ const auto& compression_type = store->get_zone_params().get_compression_type(
+ s->bucket_info.placement_rule);
+ if (compression_type != "none") {
+ plugin = Compressor::create(s->cct, compression_type);
+ if (!plugin) {
+ ldout(s->cct, 1) << "Cannot load plugin for compression type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(s->cct, plugin, filter);
+ filter = &*compressor;
+ }
}
}
- }
- while (data_pending) {
- bufferlist data;
- len = get_data(data);
+ bool again;
+ do {
+ ceph::bufferlist data;
+ len = get_data(data, again);
- if (len < 0) {
- op_ret = len;
- return;
- }
+ if (len < 0) {
+ op_ret = len;
+ return;
+ }
- if (!len)
- break;
+ if (!len) {
+ break;
+ }
- hash.Update((const byte *)data.c_str(), data.length());
- op_ret = put_data_and_throttle(filter, data, ofs, false);
+ hash.Update((const byte *)data.c_str(), data.length());
+ op_ret = put_data_and_throttle(filter, data, ofs, false);
- ofs += len;
+ ofs += len;
- if (ofs > max_len) {
- op_ret = -ERR_TOO_LARGE;
- return;
- }
- }
- {
- bufferlist flush;
- op_ret = put_data_and_throttle(filter, flush, ofs, false);
- }
- if (len < min_len) {
- op_ret = -ERR_TOO_SMALL;
- return;
- }
+ if (ofs > max_len) {
+ op_ret = -ERR_TOO_LARGE;
+ return;
+ }
+ } while (again);
- s->obj_size = ofs;
+ {
+ bufferlist flush;
+ op_ret = put_data_and_throttle(filter, flush, ofs, false);
+ }
- op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
- user_quota, bucket_quota, s->obj_size);
- if (op_ret < 0) {
- return;
- }
+ if (len < min_len) {
+ op_ret = -ERR_TOO_SMALL;
+ return;
+ }
- hash.Final(m);
- buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+ s->obj_size = ofs;
- etag = calc_md5;
- bl.append(etag.c_str(), etag.size() + 1);
- emplace_attr(RGW_ATTR_ETAG, std::move(bl));
+ op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, s->obj_size);
+ if (op_ret < 0) {
+ return;
+ }
- policy.encode(aclbl);
- emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+ hash.Final(m);
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
- if (content_type.size()) {
- bufferlist ct_bl;
- ct_bl.append(content_type.c_str(), content_type.size() + 1);
- emplace_attr(RGW_ATTR_CONTENT_TYPE, std::move(ct_bl));
- }
+ etag = calc_md5;
+ bl.append(etag.c_str(), etag.size() + 1);
+ emplace_attr(RGW_ATTR_ETAG, std::move(bl));
- if (compressor && compressor->is_compressed()) {
- bufferlist tmp;
- RGWCompressionInfo cs_info;
- cs_info.compression_type = plugin->get_type_name();
- cs_info.orig_size = s->obj_size;
- cs_info.blocks = move(compressor->get_compression_blocks());
- ::encode(cs_info, tmp);
- emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp));
- }
+ policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ const std::string content_type = get_current_content_type();
+ if (! content_type.empty()) {
+ ceph::bufferlist ct_bl;
+ ct_bl.append(content_type.c_str(), content_type.size() + 1);
+ emplace_attr(RGW_ATTR_CONTENT_TYPE, std::move(ct_bl));
+ }
+
+ if (compressor && compressor->is_compressed()) {
+ ceph::bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = s->obj_size;
+ cs_info.blocks = move(compressor->get_compression_blocks());
+ ::encode(cs_info, tmp);
+ emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp));
+ }
- op_ret = processor.complete(s->obj_size, etag, NULL, real_time(), attrs,
- (delete_at ? *delete_at : real_time()));
+ op_ret = processor.complete(s->obj_size, etag, nullptr, real_time(),
+ attrs, (delete_at ? *delete_at : real_time()));
+ } while (is_next_file_to_upload());
}
@@ -3538,7 +3728,7 @@ void RGWPutMetadataAccount::execute()
int RGWPutMetadataBucket::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
}
@@ -3569,6 +3759,12 @@ void RGWPutMetadataBucket::execute()
* the hood. This method will add the new items only if the map doesn't
* contain such keys yet. */
if (has_policy) {
+ if (s->dialect.compare("swift") == 0) {
+ auto old_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl);
+ auto new_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(&policy);
+ new_policy->filter_merge(policy_rw_mask, old_policy);
+ policy = *new_policy;
+ }
buffer::list bl;
policy.encode(bl);
emplace_attr(RGW_ATTR_ACL, std::move(bl));
@@ -3611,7 +3807,9 @@ void RGWPutMetadataBucket::execute()
int RGWPutMetadataObject::verify_permission()
{
- if (!verify_object_permission(s, RGW_PERM_WRITE)) {
+ // This looks to be something specific to Swift. We could add
+ // operations like swift:PutMetadataObject to the Policy Engine.
+ if (!verify_object_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
}
@@ -3722,7 +3920,19 @@ int RGWDeleteObj::handle_slo_manifest(bufferlist& bl)
int RGWDeleteObj::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE)) {
+ if (s->iam_policy) {
+ auto r = s->iam_policy->eval(s->env, *s->auth.identity,
+ s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(s->bucket, s->object.name));
+ if (r == Effect::Allow)
+ return true;
+ else if (r == Effect::Deny)
+ return false;
+ }
+
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
}
@@ -3872,7 +4082,8 @@ bool RGWCopyObj::parse_copy_location(const string& url_src, string& bucket_name,
int RGWCopyObj::verify_permission()
{
- RGWAccessControlPolicy src_policy(s->cct);
+ RGWAccessControlPolicy src_acl(s->cct);
+ optional<Policy> src_policy;
op_ret = get_params();
if (op_ret < 0)
return op_ret;
@@ -3907,17 +4118,32 @@ int RGWCopyObj::verify_permission()
store->set_prefetch_data(s->obj_ctx, src_obj);
/* check source object permissions */
- op_ret = read_obj_policy(store, s, src_bucket_info, src_attrs, &src_policy,
- src_bucket, src_object);
+ op_ret = read_obj_policy(store, s, src_bucket_info, src_attrs, &src_acl,
+ src_policy, src_bucket, src_object);
if (op_ret < 0) {
return op_ret;
}
/* admin request overrides permission checks */
- if (! s->auth.identity->is_admin_of(src_policy.get_owner().get_id()) &&
- ! src_policy.verify_permission(*s->auth.identity, s->perm_mask,
- RGW_PERM_READ)) {
- return -EACCES;
+ if (!s->auth.identity->is_admin_of(src_acl.get_owner().get_id())) {
+ if (src_policy) {
+ auto e = src_policy->eval(s->env, *s->auth.identity,
+ src_object.instance.empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion,
+ ARN(src_obj));
+ if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (e == Effect::Pass &&
+ !src_acl.verify_permission(*s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ } else if (!src_acl.verify_permission(*s->auth.identity,
+ s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
}
}
@@ -4070,7 +4296,6 @@ void RGWCopyObj::execute()
(version_id.empty() ? NULL : &version_id),
&s->req_id, /* use req_id as tag */
&etag,
- &s->err,
copy_obj_progress_cb, (void *)this
);
}
@@ -4079,9 +4304,12 @@ int RGWGetACLs::verify_permission()
{
bool perm;
if (!s->object.empty()) {
- perm = verify_object_permission(s, RGW_PERM_READ_ACP);
+ perm = verify_object_permission(s,
+ s->object.instance.empty() ?
+ rgw::IAM::s3GetObjectAcl :
+ rgw::IAM::s3GetObjectVersionAcl);
} else {
- perm = verify_bucket_permission(s, RGW_PERM_READ_ACP);
+ perm = verify_bucket_permission(s, rgw::IAM::s3GetObjectAcl);
}
if (!perm)
return -EACCES;
@@ -4109,9 +4337,12 @@ int RGWPutACLs::verify_permission()
{
bool perm;
if (!s->object.empty()) {
- perm = verify_object_permission(s, RGW_PERM_WRITE_ACP);
+ perm = verify_object_permission(s,
+ s->object.instance.empty() ?
+ rgw::IAM::s3PutObjectAcl :
+ rgw::IAM::s3PutObjectVersionAcl);
} else {
- perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP);
+ perm = verify_bucket_permission(s, rgw::IAM::s3PutBucketAcl);
}
if (!perm)
return -EACCES;
@@ -4122,7 +4353,7 @@ int RGWPutACLs::verify_permission()
int RGWGetLC::verify_permission()
{
bool perm;
- perm = verify_bucket_permission(s, RGW_PERM_READ_ACP);
+ perm = verify_bucket_permission(s, rgw::IAM::s3GetLifecycleConfiguration);
if (!perm)
return -EACCES;
@@ -4132,7 +4363,7 @@ int RGWGetLC::verify_permission()
int RGWPutLC::verify_permission()
{
bool perm;
- perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP);
+ perm = verify_bucket_permission(s, rgw::IAM::s3PutLifecycleConfiguration);
if (!perm)
return -EACCES;
@@ -4142,7 +4373,7 @@ int RGWPutLC::verify_permission()
int RGWDeleteLC::verify_permission()
{
bool perm;
- perm = verify_bucket_permission(s, RGW_PERM_WRITE_ACP);
+ perm = verify_bucket_permission(s, rgw::IAM::s3PutLifecycleConfiguration);
if (!perm)
return -EACCES;
@@ -4608,8 +4839,20 @@ void RGWSetRequestPayment::execute()
int RGWInitMultipart::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE))
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
+ }
return 0;
}
@@ -4713,8 +4956,20 @@ static int get_multipart_info(RGWRados *store, struct req_state *s,
int RGWCompleteMultipart::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE))
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
+ }
return 0;
}
@@ -4959,8 +5214,20 @@ void RGWCompleteMultipart::execute()
int RGWAbortMultipart::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE))
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3AbortMultipartUpload,
+ rgw_obj(s->bucket, s->object));
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(s, RGW_PERM_WRITE)) {
return -EACCES;
+ }
return 0;
}
@@ -4996,7 +5263,7 @@ void RGWAbortMultipart::execute()
int RGWListMultipart::verify_permission()
{
- if (!verify_object_permission(s, RGW_PERM_READ))
+ if (!verify_object_permission(s, rgw::IAM::s3ListMultipartUploadParts))
return -EACCES;
return 0;
@@ -5030,7 +5297,8 @@ void RGWListMultipart::execute()
int RGWListBucketMultiparts::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_READ))
+ if (!verify_bucket_permission(s,
+ rgw::IAM::s3ListBucketMultiPartUploads))
return -EACCES;
return 0;
@@ -5102,7 +5370,8 @@ void RGWGetHealthCheck::execute()
int RGWDeleteMultiObj::verify_permission()
{
- if (!verify_bucket_permission(s, RGW_PERM_WRITE))
+ acl_allowed = verify_bucket_permission_no_policy(s, RGW_PERM_WRITE);
+ if (!acl_allowed && !s->iam_policy)
return -EACCES;
return 0;
@@ -5159,6 +5428,19 @@ void RGWDeleteMultiObj::execute()
iter != multi_delete->objects.end() && num_processed < max_to_delete;
++iter, num_processed++) {
rgw_obj obj(bucket, *iter);
+ if (s->iam_policy) {
+ auto e = s->iam_policy->eval(s->env,
+ *s->auth.identity,
+ iter->instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ obj);
+ if ((e == Effect::Deny) ||
+ (e == Effect::Pass && !acl_allowed)) {
+ send_partial_response(*iter, false, "", -EACCES);
+ continue;
+ }
+ }
obj_ctx->obj.set_atomic(obj);
@@ -5205,11 +5487,14 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
return false;
}
+ auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant);
+
bucket_owner = bacl.get_owner();
/* We can use global user_acl because each BulkDelete request is allowed
* to work on entities from a single account only. */
- return verify_bucket_permission(s, s->user_acl.get(), &bacl, RGW_PERM_WRITE);
+ return verify_bucket_permission(s, binfo.bucket, s->user_acl.get(),
+ &bacl, policy, rgw::IAM::s3DeleteBucket);
}
bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path)
@@ -5422,7 +5707,28 @@ RGWBulkUploadOp::parse_path(const boost::string_ref& path)
}
}
- return boost::none;
+ return none;
+}
+
+std::pair<std::string, std::string>
+RGWBulkUploadOp::handle_upload_path(struct req_state *s)
+{
+ std::string bucket_path, file_prefix;
+ if (! s->init_state.url_bucket.empty()) {
+ file_prefix = bucket_path = s->init_state.url_bucket + "/";
+ if (! s->object.empty()) {
+ std::string& object_name = s->object.name;
+
+ /* As rgw_obj_key::empty() already verified emptiness of s->object.name,
+ * we can safely examine its last element. */
+ if (object_name.back() == '/') {
+ file_prefix.append(object_name);
+ } else {
+ file_prefix.append(object_name).append("/");
+ }
+ }
+ }
+ return std::make_pair(bucket_path, file_prefix);
}
int RGWBulkUploadOp::handle_dir_verify_permission()
@@ -5446,6 +5752,20 @@ int RGWBulkUploadOp::handle_dir_verify_permission()
return 0;
}
+static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name)
+{
+ /* the request of container or object level will contain bucket name.
+ * only at account level need to append the bucket name */
+ if (info.script_uri.find(bucket_name) != std::string::npos) {
+ return;
+ }
+
+ ldout(cct, 20) << "append the bucket: "<< bucket_name << " to req_info" << dendl;
+ info.script_uri.append("/").append(bucket_name);
+ info.request_uri_aws4 = info.request_uri = info.script_uri;
+ info.effective_uri = "/" + bucket_name;
+}
+
int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
{
ldout(s->cct, 20) << "bulk upload: got directory=" << path << dendl;
@@ -5462,14 +5782,6 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
rgw_raw_obj obj(store->get_zone_params().domain_root,
rgw_make_bucket_entry_name(s->bucket_tenant, bucket_name));
- /* Swift API doesn't support location constraint. We're just checking here
- * whether creation is taking place in the master zone or not. */
- if (! store->get_zonegroup().is_master) {
- ldout(s->cct, 0) << "creating bucket in a non-master zone." << dendl;
- op_ret = -EINVAL;
- return op_ret;
- }
-
/* we need to make sure we read bucket info, it's not read before for this
* specific request */
RGWBucketInfo binfo;
@@ -5502,7 +5814,9 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
if (! store->is_meta_master()) {
JSONParser jp;
ceph::bufferlist in_data;
- op_ret = forward_request_to_master(s, nullptr, store, in_data, &jp);
+ req_info info = s->info;
+ forward_req_info(s->cct, info, bucket_name);
+ op_ret = forward_request_to_master(s, nullptr, store, in_data, &jp, &info);
if (op_ret < 0) {
return op_ret;
}
@@ -5535,7 +5849,6 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
op_ret = store->select_bucket_placement(*(s->user),
store->get_zonegroup().get_id(),
placement_rule,
- bucket,
&selected_placement_rule,
nullptr);
if (selected_placement_rule != binfo.placement_rule) {
@@ -5615,6 +5928,7 @@ int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo,
+ const rgw_obj& obj,
std::map<std::string, ceph::bufferlist>& battrs,
ACLOwner& bucket_owner /* out */)
{
@@ -5626,8 +5940,21 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo,
return false;
}
+ auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant);
+
bucket_owner = bacl.get_owner();
- return verify_bucket_permission(s, s->user_acl.get(), &bacl, RGW_PERM_WRITE);
+ if (policy) {
+ auto e = policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject, obj);
+ if (e == Effect::Allow) {
+ return true;
+ } else if (e == Effect::Deny) {
+ return false;
+ }
+ }
+
+ return verify_bucket_permission_no_policy(s, s->user_acl.get(),
+ &bacl, RGW_PERM_WRITE);
}
int RGWBulkUploadOp::handle_file(const boost::string_ref path,
@@ -5663,7 +5990,9 @@ int RGWBulkUploadOp::handle_file(const boost::string_ref path,
return op_ret;
}
- if (! handle_file_verify_permission(binfo, battrs, bowner)) {
+ if (! handle_file_verify_permission(binfo,
+ rgw_obj(binfo.bucket, object),
+ battrs, bowner)) {
ldout(s->cct, 20) << "bulk upload: object creation unauthorized" << dendl;
op_ret = -EACCES;
return op_ret;
@@ -5802,6 +6131,11 @@ void RGWBulkUploadOp::execute()
return;
}
+ /* Handling the $UPLOAD_PATH accordingly to the Swift's Bulk middleware. See:
+ * https://github.com/openstack/swift/blob/2.13.0/swift/common/middleware/bulk.py#L31-L41 */
+ std::string bucket_path, file_prefix;
+ std::tie(bucket_path, file_prefix) = handle_upload_path(s);
+
auto status = rgw::tar::StatusIndicator::create();
do {
op_ret = stream->get_exactly(rgw::tar::BLOCK_SIZE, buffer);
@@ -5826,25 +6160,28 @@ void RGWBulkUploadOp::execute()
case rgw::tar::FileType::NORMAL_FILE: {
ldout(s->cct, 2) << "bulk upload: handling regular file" << dendl;
+ boost::string_ref filename = bucket_path.empty() ? header->get_filename() : \
+ file_prefix + header->get_filename().to_string();
auto body = AlignedStreamGetter(0, header->get_filesize(),
rgw::tar::BLOCK_SIZE, *stream);
- op_ret = handle_file(header->get_filename(),
+ op_ret = handle_file(filename,
header->get_filesize(),
body);
if (! op_ret) {
/* Only regular files counts. */
num_created++;
} else {
- failures.emplace_back(op_ret, header->get_filename().to_string());
+ failures.emplace_back(op_ret, filename.to_string());
}
break;
}
case rgw::tar::FileType::DIRECTORY: {
ldout(s->cct, 2) << "bulk upload: handling regular directory" << dendl;
- op_ret = handle_dir(header->get_filename());
+ boost::string_ref dirname = bucket_path.empty() ? header->get_filename() : bucket_path;
+ op_ret = handle_dir(dirname);
if (op_ret < 0 && op_ret != -ERR_BUCKET_EXISTS) {
- failures.emplace_back(op_ret, header->get_filename().to_string());
+ failures.emplace_back(op_ret, dirname.to_string());
}
break;
}
@@ -5905,11 +6242,13 @@ ssize_t RGWBulkUploadOp::AlignedStreamGetter::get_exactly(const size_t want,
int RGWSetAttrs::verify_permission()
{
+ // This looks to be part of the RGW-NFS machinery and has no S3 or
+ // Swift equivalent.
bool perm;
if (!s->object.empty()) {
- perm = verify_object_permission(s, RGW_PERM_WRITE);
+ perm = verify_object_permission_no_policy(s, RGW_PERM_WRITE);
} else {
- perm = verify_bucket_permission(s, RGW_PERM_WRITE);
+ perm = verify_bucket_permission_no_policy(s, RGW_PERM_WRITE);
}
if (!perm)
return -EACCES;
@@ -5985,6 +6324,7 @@ int RGWHandler::init(RGWRados *_store,
int RGWHandler::do_init_permissions()
{
int ret = rgw_build_bucket_policies(store, s);
+ s->env = rgw_build_iam_environment(store, s);
if (ret < 0) {
ldout(s->cct, 10) << "read_permissions on " << s->bucket << " ret=" << ret << dendl;
@@ -6022,3 +6362,110 @@ int RGWHandler::error_handler(int err_no, string *error_content) {
// This is the do-nothing error handler
return err_no;
}
+
+
+void RGWPutBucketPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWPutBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(s, rgw::IAM::s3PutBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWPutBucketPolicy::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ // At some point when I have more time I want to make a version of
+ // rgw_rest_read_all_input that doesn't use malloc.
+ op_ret = rgw_rest_read_all_input(s, &data, &len, max_size, false);
+ // And throws exceptions.
+ return op_ret;
+}
+
+void RGWPutBucketPolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ try {
+ Policy p(s->cct, s->bucket_tenant,
+ bufferlist::static_from_mem(data, len));
+ auto attrs = s->bucket_attrs;
+ attrs[RGW_ATTR_IAM_POLICY].append(p.text);
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ if (op_ret == -ECANCELED) {
+ op_ret = 0; /* lost a race, but it's ok because policies are immutable */
+ }
+ } catch (rgw::IAM::PolicyParseException& e) {
+ op_ret = -EINVAL;
+ }
+}
+
+void RGWGetBucketPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+ dump_start(s);
+ rgw_flush_formatter(s, s->formatter);
+ dump_body(s, policy);
+}
+
+int RGWGetBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(s, rgw::IAM::s3GetBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWGetBucketPolicy::execute()
+{
+ auto attrs = s->bucket_attrs;
+ policy = attrs[RGW_ATTR_IAM_POLICY];
+}
+
+void RGWDeleteBucketPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWDeleteBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(s, rgw::IAM::s3DeleteBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWDeleteBucketPolicy::execute()
+{
+ auto attrs = s->bucket_attrs;
+ attrs.erase(RGW_ATTR_IAM_POLICY);
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ if (op_ret == -ECANCELED) {
+ op_ret = 0; /* lost a race, but it's ok because policies are immutable */
+ }
+}
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 11c88a37fa2..1b4f02f1b7f 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -1,4 +1,4 @@
-// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/**
* All operations via the rados gateway are carried out by
@@ -22,6 +22,7 @@
#include <boost/optional.hpp>
#include <boost/utility/in_place_factory.hpp>
+#include <boost/function.hpp>
#include "common/armor.h"
#include "common/mime.h"
@@ -43,7 +44,6 @@
#include "include/assert.h"
-using namespace std;
using ceph::crypto::SHA1;
struct req_state;
@@ -93,6 +93,7 @@ public:
virtual int authorize() = 0;
virtual int postauth_init() = 0;
virtual int error_handler(int err_no, std::string* error_content);
+ virtual void dump(const string& code, const string& message) const {}
};
@@ -211,6 +212,7 @@ protected:
bool first_data;
uint64_t cur_ofs;
bufferlist waiting;
+ uint64_t action = 0;
int init_common();
public:
@@ -249,11 +251,13 @@ public:
int verify_permission() override;
void pre_exec() override;
void execute() override;
- int read_user_manifest_part(rgw_bucket& bucket,
- const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy *bucket_policy,
- off_t start_ofs,
- off_t end_ofs);
+ int read_user_manifest_part(
+ rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const off_t start_ofs,
+ const off_t end_ofs);
int handle_user_manifest(const char *prefix);
int handle_slo_manifest(bufferlist& bl);
@@ -427,9 +431,13 @@ protected:
boost::optional<std::pair<std::string, rgw_obj_key>>
parse_path(const boost::string_ref& path);
+
+ std::pair<std::string, std::string>
+ handle_upload_path(struct req_state *s);
bool handle_file_verify_permission(RGWBucketInfo& binfo,
- std::map<std::string, ceph::bufferlist>& battrs,
+ const rgw_obj& obj,
+ std::map<std::string, ceph::bufferlist>& battrs,
ACLOwner& bucket_owner /* out */);
int handle_file(boost::string_ref path,
size_t size,
@@ -926,6 +934,7 @@ protected:
string version_id;
bufferlist bl_aux;
map<string, string> crypt_http_responses;
+ string user_data;
boost::optional<ceph::real_time> delete_at;
@@ -1013,21 +1022,23 @@ protected:
const char *supplied_md5_b64;
const char *supplied_etag;
string etag;
- string boundary;
- bool data_pending;
- string content_type;
RGWAccessControlPolicy policy;
map<string, bufferlist> attrs;
boost::optional<ceph::real_time> delete_at;
+ /* Must be called after get_data() or the result is undefined. */
+ virtual std::string get_current_filename() const = 0;
+ virtual std::string get_current_content_type() const = 0;
+ virtual bool is_next_file_to_upload() {
+ return false;
+ }
public:
RGWPostObj() : min_len(0),
max_len(LLONG_MAX),
len(0),
ofs(0),
supplied_md5_b64(nullptr),
- supplied_etag(nullptr),
- data_pending(false) {
+ supplied_etag(nullptr) {
}
void emplace_attr(std::string&& key, buffer::list&& bl) {
@@ -1048,9 +1059,9 @@ public:
return 0;
}
virtual int get_params() = 0;
- virtual int get_data(bufferlist& bl) = 0;
+ virtual int get_data(ceph::bufferlist& bl, bool& again) = 0;
void send_response() override = 0;
- const string name() override { return "post_obj"; }
+ const std::string name() override { return "post_obj"; }
RGWOpType get_type() override { return RGW_OP_POST_OBJ; }
uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
};
@@ -1098,6 +1109,7 @@ protected:
map<string, buffer::list> attrs;
set<string> rmattr_names;
bool has_policy, has_cors;
+ uint32_t policy_rw_mask;
RGWAccessControlPolicy policy;
RGWCORSConfiguration cors_config;
string placement_rule;
@@ -1105,7 +1117,7 @@ protected:
public:
RGWPutMetadataBucket()
- : has_policy(false), has_cors(false)
+ : has_policy(false), has_cors(false), policy_rw_mask(0)
{}
void emplace_attr(std::string&& key, buffer::list&& bl) {
@@ -1688,6 +1700,7 @@ protected:
rgw_bucket bucket;
bool quiet;
bool status_dumped;
+ bool acl_allowed = false;
public:
RGWDeleteMultiObj() {
@@ -1725,7 +1738,9 @@ public:
extern int rgw_build_bucket_policies(RGWRados* store, struct req_state* s);
extern int rgw_build_object_policies(RGWRados *store, struct req_state *s,
- bool prefetch_data);
+ bool prefetch_data);
+extern rgw::IAM::Environment rgw_build_iam_environment(RGWRados* store,
+ struct req_state* s);
static inline int put_data_and_throttle(RGWPutObjDataProcessor *processor,
bufferlist& data, off_t ofs,
@@ -1935,6 +1950,66 @@ public:
virtual uint32_t op_mask() { return RGW_OP_TYPE_READ; }
};
+class RGWPutBucketPolicy : public RGWOp {
+ int len;
+ char *data = nullptr;
+public:
+ RGWPutBucketPolicy() = default;
+ ~RGWPutBucketPolicy() {
+ if (data) {
+ free(static_cast<void*>(data));
+ }
+ }
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_WRITE;
+ }
+ void execute() override;
+ int get_params();
+ const std::string name() override {
+ return "put_bucket_policy";
+ }
+ RGWOpType get_type() override {
+ return RGW_OP_PUT_BUCKET_POLICY;
+ }
+};
+
+class RGWGetBucketPolicy : public RGWOp {
+ buffer::list policy;
+public:
+ RGWGetBucketPolicy() = default;
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_READ;
+ }
+ void execute() override;
+ const std::string name() override {
+ return "get_bucket_policy";
+ }
+ RGWOpType get_type() override {
+ return RGW_OP_GET_BUCKET_POLICY;
+ }
+};
+
+class RGWDeleteBucketPolicy : public RGWOp {
+public:
+ RGWDeleteBucketPolicy() = default;
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_WRITE;
+ }
+ void execute() override;
+ int get_params();
+ const std::string name() override {
+ return "delete_bucket_policy";
+ }
+ RGWOpType get_type() override {
+ return RGW_OP_DELETE_BUCKET_POLICY;
+ }
+};
#endif /* CEPH_RGW_OP_H */
diff --git a/src/rgw/rgw_period_history.cc b/src/rgw/rgw_period_history.cc
index eff0e78ad9d..895700f6897 100644
--- a/src/rgw/rgw_period_history.cc
+++ b/src/rgw/rgw_period_history.cc
@@ -69,6 +69,15 @@ bool Cursor::has_next() const
return epoch < history->get_newest_epoch();
}
+bool operator==(const Cursor& lhs, const Cursor& rhs)
+{
+ return lhs.history == rhs.history && lhs.epoch == rhs.epoch;
+}
+
+bool operator!=(const Cursor& lhs, const Cursor& rhs)
+{
+ return !(lhs == rhs);
+}
class RGWPeriodHistory::Impl final {
public:
diff --git a/src/rgw/rgw_period_history.h b/src/rgw/rgw_period_history.h
index 9541493aa14..0796c6116b5 100644
--- a/src/rgw/rgw_period_history.h
+++ b/src/rgw/rgw_period_history.h
@@ -75,6 +75,9 @@ class RGWPeriodHistory final {
void prev() { epoch--; }
void next() { epoch++; }
+ friend bool operator==(const Cursor& lhs, const Cursor& rhs);
+ friend bool operator!=(const Cursor& lhs, const Cursor& rhs);
+
private:
// private constructors for RGWPeriodHistory
friend class RGWPeriodHistory::Impl;
diff --git a/src/rgw/rgw_period_pusher.cc b/src/rgw/rgw_period_pusher.cc
index d6b2eabec39..75b454cf1c6 100644
--- a/src/rgw/rgw_period_pusher.cc
+++ b/src/rgw/rgw_period_pusher.cc
@@ -135,10 +135,11 @@ class RGWPeriodPusher::CRThread {
std::map<std::string, RGWRESTConn>&& conns)
: coroutines(cct, NULL),
http(cct, coroutines.get_completion_mgr()),
- push_all(new PushAllCR(cct, &http, std::move(period), std::move(conns))),
- thread([this] { coroutines.run(push_all.get()); })
+ push_all(new PushAllCR(cct, &http, std::move(period), std::move(conns)))
{
http.set_threaded();
+ // must spawn the CR thread after set_threaded
+ thread = std::thread([this] { coroutines.run(push_all.get()); });
}
~CRThread()
{
diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h
index 83c59a4cc37..005f2db6fef 100644
--- a/src/rgw/rgw_process.h
+++ b/src/rgw/rgw_process.h
@@ -17,6 +17,8 @@
#include "common/WorkQueue.h"
#include "common/Throttle.h"
+#include <atomic>
+
#if !defined(dout_subsys)
#define dout_subsys ceph_subsys_rgw
#define def_dout_subsys
@@ -182,7 +184,7 @@ public:
void checkpoint();
void handle_request(RGWRequest* req) override;
void gen_request(const string& method, const string& resource,
- int content_length, atomic_t* fail_flag);
+ int content_length, std::atomic<int64_t>* fail_flag);
void set_access_key(RGWAccessKey& key) { access_key = key; }
};
diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc
index 02347ff743d..6a61400a110 100644
--- a/src/rgw/rgw_quota.cc
+++ b/src/rgw/rgw_quota.cc
@@ -26,6 +26,8 @@
#include "rgw_bucket.h"
#include "rgw_user.h"
+#include <atomic>
+
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
@@ -410,7 +412,7 @@ void UserAsyncRefreshHandler::handle_response(int r)
}
class RGWUserStatsCache : public RGWQuotaCache<rgw_user> {
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
RWLock rwlock;
map<rgw_bucket, rgw_user> modified_buckets;
@@ -569,11 +571,11 @@ public:
}
bool going_down() {
- return (down_flag.read() != 0);
+ return down_flag;
}
void stop() {
- down_flag.set(1);
+ down_flag = true;
rwlock.get_write();
stop_thread(&buckets_sync_thread);
rwlock.unlock();
diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h
index d55dcaff952..9291434634c 100644
--- a/src/rgw/rgw_quota.h
+++ b/src/rgw/rgw_quota.h
@@ -15,11 +15,11 @@
#ifndef CEPH_RGW_QUOTA_H
#define CEPH_RGW_QUOTA_H
-
#include "include/utime.h"
-#include "include/atomic.h"
#include "common/lru_map.h"
+#include <atomic>
+
static inline int64_t rgw_rounded_kb(int64_t bytes)
{
return (bytes + 1023) / 1024;
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index eb4fbeda6a2..f3fb6dc2ea3 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -55,6 +55,7 @@ using namespace librados;
#include <string>
#include <iostream>
#include <vector>
+#include <atomic>
#include <list>
#include <map>
#include "auth/Crypto.h" // get_random_bytes()
@@ -71,14 +72,13 @@ using namespace librados;
#include "compressor/Compressor.h"
+#include <atomic>
+
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
using namespace std;
-static RGWCache<RGWRados> cached_rados_provider;
-static RGWRados rados_provider;
-
static string notify_oid_prefix = "notify";
static string *notify_oids = NULL;
static string shadow_ns = "shadow";
@@ -1428,33 +1428,62 @@ void RGWPeriod::fork()
realm_epoch++;
}
-int RGWPeriod::update_sync_status()
+static int read_sync_status(RGWRados *store, rgw_meta_sync_status *sync_status)
{
- // must be new period's master zone to write sync status
- if (master_zone != store->get_zone_params().get_id()) {
- ldout(cct, 0) << "my zone " << store->get_zone_params().get_id()
- << " is not period's master zone " << master_zone << dendl;
- return -EINVAL;
+ // initialize a sync status manager to read the status
+ RGWMetaSyncStatusManager mgr(store, store->get_async_rados());
+ int r = mgr.init();
+ if (r < 0) {
+ return r;
}
+ r = mgr.read_sync_status(sync_status);
+ mgr.stop();
+ return r;
+}
- auto mdlog = store->meta_mgr->get_log(get_id());
- const auto num_shards = cct->_conf->rgw_md_log_max_shards;
+int RGWPeriod::update_sync_status(const RGWPeriod &current_period,
+ std::ostream& error_stream,
+ bool force_if_stale)
+{
+ rgw_meta_sync_status status;
+ int r = read_sync_status(store, &status);
+ if (r < 0) {
+ ldout(cct, 0) << "period failed to read sync status: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
std::vector<std::string> markers;
- markers.reserve(num_shards);
- // gather the markers for each shard
- // TODO: use coroutines to read them in parallel
- for (int i = 0; i < num_shards; i++) {
- RGWMetadataLogInfo info;
- int r = mdlog->get_info(i, &info);
- if (r < 0) {
- ldout(cct, 0) << "period failed to get metadata log info for shard " << i
- << ": " << cpp_strerror(-r) << dendl;
- return r;
+ const auto current_epoch = current_period.get_realm_epoch();
+ if (current_epoch != status.sync_info.realm_epoch) {
+ // no sync status markers for the current period
+ assert(current_epoch > status.sync_info.realm_epoch);
+ const int behind = current_epoch - status.sync_info.realm_epoch;
+ if (!force_if_stale && current_epoch > 1) {
+ error_stream << "ERROR: This zone is " << behind << " period(s) behind "
+ "the current master zone in metadata sync. If this zone is promoted "
+ "to master, any metadata changes during that time are likely to "
+ "be lost.\n"
+ "Waiting for this zone to catch up on metadata sync (see "
+ "'radosgw-admin sync status') is recommended.\n"
+ "To promote this zone to master anyway, add the flag "
+ "--yes-i-really-mean-it." << std::endl;
+ return -EINVAL;
+ }
+ // empty sync status markers - other zones will skip this period during
+ // incremental metadata sync
+ markers.resize(status.sync_info.num_shards);
+ } else {
+ markers.reserve(status.sync_info.num_shards);
+ for (auto& i : status.sync_markers) {
+ auto& marker = i.second;
+ // filter out markers from other periods
+ if (marker.realm_epoch != current_epoch) {
+ marker.marker.clear();
+ }
+ markers.emplace_back(std::move(marker.marker));
}
- ldout(cct, 15) << "got shard " << i << " marker " << info.marker << dendl;
- markers.emplace_back(std::move(info.marker));
}
std::swap(sync_status, markers);
@@ -1462,7 +1491,7 @@ int RGWPeriod::update_sync_status()
}
int RGWPeriod::commit(RGWRealm& realm, const RGWPeriod& current_period,
- std::ostream& error_stream)
+ std::ostream& error_stream, bool force_if_stale)
{
ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl;
// gateway must be in the master zone to commit
@@ -1492,7 +1521,7 @@ int RGWPeriod::commit(RGWRealm& realm, const RGWPeriod& current_period,
// did the master zone change?
if (master_zone != current_period.get_master_zone()) {
// store the current metadata sync status in the period
- int r = update_sync_status();
+ int r = update_sync_status(current_period, error_stream, force_if_stale);
if (r < 0) {
ldout(cct, 0) << "failed to update metadata sync status: "
<< cpp_strerror(-r) << dendl;
@@ -2356,9 +2385,9 @@ void RGWObjVersionTracker::generate_new_write_ver(CephContext *cct)
int RGWPutObjProcessor::complete(size_t accounted_size, const string& etag,
real_time *mtime, real_time set_mtime,
map<string, bufferlist>& attrs, real_time delete_at,
- const char *if_match, const char *if_nomatch)
+ const char *if_match, const char *if_nomatch, const string *user_data)
{
- int r = do_complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, if_match, if_nomatch);
+ int r = do_complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, if_match, if_nomatch, user_data);
if (r < 0)
return r;
@@ -2531,6 +2560,9 @@ int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phan
*pobj = cur_obj;
+ if (!bl.length())
+ return 0;
+
return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle, exclusive);
}
@@ -2700,7 +2732,7 @@ int RGWPutObjProcessor_Atomic::do_complete(size_t accounted_size, const string&
map<string, bufferlist>& attrs,
real_time delete_at,
const char *if_match,
- const char *if_nomatch) {
+ const char *if_nomatch, const string *user_data) {
int r = complete_writing_data();
if (r < 0)
return r;
@@ -2725,6 +2757,8 @@ int RGWPutObjProcessor_Atomic::do_complete(size_t accounted_size, const string&
obj_op.meta.flags = PUT_OBJ_CREATE;
obj_op.meta.olh_epoch = olh_epoch;
obj_op.meta.delete_at = delete_at;
+ obj_op.meta.user_data = user_data;
+
r = obj_op.write_meta(obj_len, accounted_size, attrs);
if (r < 0) {
return r;
@@ -2927,7 +2961,7 @@ protected:
CephContext *cct;
RGWRados *store;
- atomic_t down_flag;
+ std::atomic<bool> down_flag = { false };
string thread_name;
@@ -2943,7 +2977,8 @@ public:
virtual int init() { return 0; }
virtual int process() = 0;
- bool going_down() { return down_flag.read() != 0; }
+ bool going_down() { return down_flag; }
+
void start();
void stop();
};
@@ -2956,7 +2991,7 @@ void RGWRadosThread::start()
void RGWRadosThread::stop()
{
- down_flag.set(1);
+ down_flag = true;
stop_process();
if (worker) {
worker->stop();
@@ -3194,9 +3229,20 @@ public:
return http.set_threaded();
}
int process() override {
- crs.run(create_data_log_trim_cr(store, &http,
- cct->_conf->rgw_data_log_num_shards,
- trim_interval));
+ list<RGWCoroutinesStack*> stacks;
+ auto meta = new RGWCoroutinesStack(store->ctx(), &crs);
+ meta->call(create_meta_log_trim_cr(store, &http,
+ cct->_conf->rgw_md_log_max_shards,
+ trim_interval));
+ stacks.push_back(meta);
+
+ auto data = new RGWCoroutinesStack(store->ctx(), &crs);
+ data->call(create_data_log_trim_cr(store, &http,
+ cct->_conf->rgw_data_log_num_shards,
+ trim_interval));
+ stacks.push_back(data);
+
+ crs.run(stacks);
return 0;
}
};
@@ -4129,7 +4175,8 @@ int RGWRados::init_complete()
/* no point of running sync thread if we don't have a master zone configured
or there is no rest_master_conn */
- if (get_zonegroup().master_zone.empty() || !rest_master_conn) {
+ if (get_zonegroup().master_zone.empty() || !rest_master_conn
+ || current_period.get_id().empty()) {
run_sync_thread = false;
}
@@ -5182,14 +5229,14 @@ int RGWRados::Bucket::List::list_objects(int max, vector<rgw_bucket_dir_entry> *
if (delim_pos >= 0) {
string s = cur_marker.name.substr(0, delim_pos);
s.append(bigger_than_delim);
- cur_marker.set(s);
+ cur_marker = s;
}
}
string skip_after_delim;
while (truncated && count <= max) {
if (skip_after_delim > cur_marker.name) {
- cur_marker.set(skip_after_delim);
+ cur_marker = skip_after_delim;
ldout(cct, 20) << "setting cur_marker=" << cur_marker.name << "[" << cur_marker.instance << "]" << dendl;
}
std::map<string, rgw_bucket_dir_entry> ent_map;
@@ -5370,7 +5417,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
for (int i = 0; i < MAX_CREATE_RETRIES; i++) {
int ret = 0;
- ret = select_bucket_placement(owner, zonegroup_id, placement_rule, bucket,
+ ret = select_bucket_placement(owner, zonegroup_id, placement_rule,
&selected_placement_rule_name, &rule_info);
if (ret < 0)
return ret;
@@ -5414,7 +5461,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
info.quota = *pquota_info;
}
- int r = init_bucket_index(info, bucket_index_max_shards);
+ int r = init_bucket_index(info, info.num_shards);
if (r < 0) {
return r;
}
@@ -5465,7 +5512,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
}
int RGWRados::select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& request_rule,
- rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info)
+ string *pselected_rule_name, RGWZonePlacementInfo *rule_info)
{
/* first check that rule exists within the specific zonegroup */
@@ -5506,16 +5553,16 @@ int RGWRados::select_new_bucket_location(RGWUserInfo& user_info, const string& z
if (pselected_rule_name)
*pselected_rule_name = rule;
- return select_bucket_location_by_rule(rule, bucket, rule_info);
+ return select_bucket_location_by_rule(rule, rule_info);
}
-int RGWRados::select_bucket_location_by_rule(const string& location_rule, rgw_bucket& bucket, RGWZonePlacementInfo *rule_info)
+int RGWRados::select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info)
{
if (location_rule.empty()) {
/* we can only reach here if we're trying to set a bucket location from a bucket
* created on a different zone, using a legacy / default pool configuration
*/
- return select_legacy_bucket_placement(bucket, rule_info);
+ return select_legacy_bucket_placement(rule_info);
}
/*
@@ -5546,21 +5593,21 @@ int RGWRados::select_bucket_location_by_rule(const string& location_rule, rgw_bu
}
int RGWRados::select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& placement_rule,
- rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info)
+ string *pselected_rule_name, RGWZonePlacementInfo *rule_info)
{
if (!get_zone_params().placement_pools.empty()) {
return select_new_bucket_location(user_info, zonegroup_id, placement_rule,
- bucket, pselected_rule_name, rule_info);
+ pselected_rule_name, rule_info);
}
if (pselected_rule_name) {
pselected_rule_name->clear();
}
- return select_legacy_bucket_placement(bucket, rule_info);
+ return select_legacy_bucket_placement(rule_info);
}
-int RGWRados::select_legacy_bucket_placement(rgw_bucket& bucket, RGWZonePlacementInfo *rule_info)
+int RGWRados::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info)
{
bufferlist map_bl;
map<string, bufferlist> m;
@@ -6224,7 +6271,6 @@ int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx,
NULL, /* string *version_id */
NULL, /* string *ptag */
NULL, /* string *petag */
- NULL, /* struct rgw_err *err */
NULL, /* void (*progress_cb)(off_t, void *) */
NULL); /* void *progress_data */
if (r == -ECANCELED || r == -ENOENT) {
@@ -6314,7 +6360,6 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx,
nullptr, /* string *version_id */
nullptr, /* string *ptag */
nullptr, /* string *petag */
- nullptr, /* struct rgw_err *err */
nullptr, /* void (*progress_cb)(off_t, void *) */
nullptr); /* void *progress_data */
if (ret == -ECANCELED || ret == -ENOENT) {
@@ -6504,7 +6549,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
r = index_op->complete(poolid, epoch, size, accounted_size,
meta.set_mtime, etag, content_type, &acl_bl,
- meta.category, meta.remove_objs);
+ meta.category, meta.remove_objs, meta.user_data);
if (r < 0)
goto done_cancel;
@@ -6674,7 +6719,8 @@ int RGWRados::put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, real_time *mt
}
int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
- off_t ofs, bool exclusive)
+ off_t ofs, bool exclusive,
+ RGWObjVersionTracker *objv_tracker)
{
rgw_rados_ref ref;
rgw_pool pool;
@@ -6688,6 +6734,9 @@ int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
if (exclusive)
op.create(true);
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
if (ofs == -1) {
op.write_full(bl);
} else {
@@ -6697,6 +6746,9 @@ int RGWRados::put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
if (r < 0)
return r;
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
return 0;
}
@@ -6952,7 +7004,7 @@ int RGWRados::rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj)
}
return copy_obj_data(rctx, dest_bucket_info, read_op, obj_size - 1, obj, obj, max_chunk_size, NULL, mtime, attrset,
- RGW_OBJ_CATEGORY_MAIN, 0, real_time(), NULL, NULL, NULL, NULL);
+ RGW_OBJ_CATEGORY_MAIN, 0, real_time(), NULL, NULL, NULL);
}
struct obj_time_weight {
@@ -7171,7 +7223,6 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx,
string *version_id,
string *ptag,
ceph::buffer::list *petag,
- struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data)
{
@@ -7469,7 +7520,6 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
string *version_id,
string *ptag,
ceph::buffer::list *petag,
- struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data)
{
@@ -7499,7 +7549,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
dest_obj, src_obj, dest_bucket_info, src_bucket_info, src_mtime, mtime, mod_ptr,
unmod_ptr, high_precision_time,
if_match, if_nomatch, attrs_mod, copy_if_newer, attrs, category,
- olh_epoch, delete_at, version_id, ptag, petag, err, progress_cb, progress_data);
+ olh_epoch, delete_at, version_id, ptag, petag, progress_cb, progress_data);
}
map<string, bufferlist> src_attrs;
@@ -7514,7 +7564,6 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
read_op.params.attrs = &src_attrs;
read_op.params.lastmod = src_mtime;
read_op.params.obj_size = &obj_size;
- read_op.params.perr = err;
ret = read_op.prepare();
if (ret < 0) {
@@ -7594,7 +7643,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */
return copy_obj_data(obj_ctx, dest_bucket_info, read_op, obj_size - 1, dest_obj, src_obj,
max_chunk_size, mtime, real_time(), attrs, category, olh_epoch, delete_at,
- version_id, ptag, petag, err);
+ version_id, ptag, petag);
}
RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin();
@@ -7730,8 +7779,7 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx,
real_time delete_at,
string *version_id,
string *ptag,
- ceph::buffer::list *petag,
- struct rgw_err *err)
+ ceph::buffer::list *petag)
{
bufferlist first_chunk;
RGWObjManifest manifest;
@@ -9460,7 +9508,7 @@ int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch,
const string& content_type,
bufferlist *acl_bl,
RGWObjCategory category,
- list<rgw_obj_index_key> *remove_objs)
+ list<rgw_obj_index_key> *remove_objs, const string *user_data)
{
if (blind) {
return 0;
@@ -9479,6 +9527,9 @@ int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch,
ent.meta.accounted_size = accounted_size;
ent.meta.mtime = ut;
ent.meta.etag = etag;
+ if (user_data)
+ ent.meta.user_data = *user_data;
+
ACLOwner owner;
if (acl_bl && acl_bl->length()) {
int ret = store->decode_policy(*acl_bl, &owner);
@@ -9760,8 +9811,8 @@ struct get_obj_data : public RefCountedObject {
Mutex data_lock;
list<get_obj_aio_data> aio_data;
RGWGetDataCB *client_cb;
- atomic_t cancelled;
- atomic_t err_code;
+ std::atomic<bool> cancelled = { false };
+ std::atomic<int64_t> err_code = { 0 };
Throttle throttle;
list<bufferlist> read_list;
@@ -9773,16 +9824,16 @@ struct get_obj_data : public RefCountedObject {
throttle(cct, "get_obj_data", cct->_conf->rgw_get_obj_window_size, false) {}
~get_obj_data() override { }
void set_cancelled(int r) {
- cancelled.set(1);
- err_code.set(r);
+ cancelled = true;
+ err_code = r;
}
bool is_cancelled() {
- return cancelled.read() == 1;
+ return cancelled;
}
int get_err_code() {
- return err_code.read();
+ return err_code;
}
int wait_next_io(bool *done) {
@@ -11626,7 +11677,7 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_
if (filter && !filter->filter(oid, oid))
continue;
- e.key.set(oid);
+ e.key = oid;
objs.push_back(e);
}
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index e4947298e62..e60ccbdf0be 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1787,7 +1787,8 @@ class RGWPeriod
const string get_period_oid_prefix();
// gather the metadata sync status for each shard; only for use on master zone
- int update_sync_status();
+ int update_sync_status(const RGWPeriod &current_period,
+ std::ostream& error_stream, bool force_if_stale);
public:
RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
@@ -1860,7 +1861,7 @@ public:
// commit a staging period; only for use on master zone
int commit(RGWRealm& realm, const RGWPeriod &current_period,
- std::ostream& error_stream);
+ std::ostream& error_stream, bool force_if_stale = false);
void encode(bufferlist& bl) const {
ENCODE_START(1, 1, bl);
@@ -2198,7 +2199,7 @@ class RGWRados
void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
- atomic64_t max_req_id;
+ std::atomic<int64_t> max_req_id = { 0 };
Mutex lock;
Mutex watchers_lock;
SafeTimer *timer;
@@ -2297,7 +2298,7 @@ protected:
RGWPeriod current_period;
public:
- RGWRados() : max_req_id(0), lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
+ RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
run_sync_thread(false), async_rados(nullptr), meta_notifier(NULL),
data_notifier(NULL), meta_sync_processor_thread(NULL),
@@ -2319,7 +2320,7 @@ public:
meta_mgr(NULL), data_log(NULL) {}
uint64_t get_new_req_id() {
- return max_req_id.inc();
+ return ++max_req_id;
}
librados::IoCtx* get_lc_pool_ctx() {
@@ -2528,11 +2529,11 @@ public:
*/
int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
- rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
- int select_legacy_bucket_placement(rgw_bucket& bucket, RGWZonePlacementInfo *rule_info);
+ string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
+ int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
- rgw_bucket& bucket, string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
- int select_bucket_location_by_rule(const string& location_rule, rgw_bucket& bucket, RGWZonePlacementInfo *rule_info);
+ string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
+ int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
void create_bucket_id(string *bucket_id);
bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
@@ -2594,9 +2595,8 @@ public:
ceph::real_time *lastmod;
uint64_t *obj_size;
map<string, bufferlist> *attrs;
- struct rgw_err *perr;
- StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {}
+ StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
} stat_params;
struct ReadParams {
@@ -2707,9 +2707,8 @@ public:
ceph::real_time *lastmod;
uint64_t *obj_size;
map<string, bufferlist> *attrs;
- struct rgw_err *perr;
- Params() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {}
+ Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
} params;
explicit Read(RGWRados::Object *_source) : source(_source) {}
@@ -2740,10 +2739,11 @@ public:
uint64_t olh_epoch;
ceph::real_time delete_at;
bool canceled;
+ const string *user_data;
MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
- if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false) {}
+ if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr) {}
} meta;
explicit Write(RGWRados::Object *_target) : target(_target) {}
@@ -2876,7 +2876,7 @@ public:
uint64_t accounted_size, ceph::real_time& ut,
const string& etag, const string& content_type,
bufferlist *acl_bl, RGWObjCategory category,
- list<rgw_obj_index_key> *remove_objs);
+ list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
int complete_del(int64_t poolid, uint64_t epoch,
ceph::real_time& removed_mtime, /* mtime of removed object */
list<rgw_obj_index_key> *remove_objs);
@@ -2922,7 +2922,8 @@ public:
ceph::real_time set_mtime /* 0 for don't set */);
virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
- off_t ofs, bool exclusive);
+ off_t ofs, bool exclusive,
+ RGWObjVersionTracker *objv_tracker = nullptr);
int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
off_t ofs, bool exclusive, void **handle);
@@ -3020,7 +3021,6 @@ public:
string *version_id,
string *ptag,
ceph::buffer::list *petag,
- struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data);
/**
@@ -3035,7 +3035,6 @@ public:
* parameter, source object attributes are not copied;
* ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
* are overwritten by values contained in attrs parameter.
- * err: stores any errors resulting from the get of the original object
* Returns: 0 on success, -ERR# otherwise.
*/
int copy_obj(RGWObjectCtx& obj_ctx,
@@ -3064,7 +3063,6 @@ public:
string *version_id,
string *ptag,
ceph::buffer::list *petag,
- struct rgw_err *err,
void (*progress_cb)(off_t, void *),
void *progress_data);
@@ -3082,8 +3080,7 @@ public:
ceph::real_time delete_at,
string *version_id,
string *ptag,
- ceph::buffer::list *petag,
- struct rgw_err *err);
+ ceph::buffer::list *petag);
int check_bucket_empty(RGWBucketInfo& bucket_info);
@@ -3655,7 +3652,7 @@ protected:
virtual int do_complete(size_t accounted_size, const string& etag,
ceph::real_time *mtime, ceph::real_time set_mtime,
map<string, bufferlist>& attrs, ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch) = 0;
+ const char *if_match, const char *if_nomatch, const string *user_data) = 0;
public:
RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
@@ -3672,7 +3669,7 @@ public:
int complete(size_t accounted_size, const string& etag,
ceph::real_time *mtime, ceph::real_time set_mtime,
map<string, bufferlist>& attrs, ceph::real_time delete_at,
- const char *if_match = NULL, const char *if_nomatch = NULL);
+ const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr);
CephContext *ctx();
@@ -3750,7 +3747,7 @@ protected:
int do_complete(size_t accounted_size, const string& etag,
ceph::real_time *mtime, ceph::real_time set_mtime,
map<string, bufferlist>& attrs, ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch) override;
+ const char *if_match, const char *if_nomatch, const string *user_data) override;
int prepare_next_part(off_t ofs);
int complete_parts();
@@ -3865,7 +3862,7 @@ protected:
int do_complete(size_t accounted_size, const string& etag,
ceph::real_time *mtime, ceph::real_time set_mtime,
map<string, bufferlist>& attrs, ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch) override;
+ const char *if_match, const char *if_nomatch, const string *user_data) override;
public:
bool immutable_head() { return true; }
RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
diff --git a/src/rgw/rgw_realm_reloader.cc b/src/rgw/rgw_realm_reloader.cc
index a1d17831798..8bd65b45d9f 100644
--- a/src/rgw/rgw_realm_reloader.cc
+++ b/src/rgw/rgw_realm_reloader.cc
@@ -64,12 +64,10 @@ void RGWRealmReloader::handle_notify(RGWRealmNotify type,
reload_scheduled = new C_Reload(this);
cond.SignalOne(); // wake reload() if it blocked on a bad configuration
- // schedule reload() with a delay so we can batch up changes
- auto delay = cct->_conf->rgw_realm_reconfigure_delay;
- timer.add_event_after(delay, reload_scheduled);
+ // schedule reload() without delay
+ timer.add_event_after(0, reload_scheduled);
- ldout(cct, 4) << "Notification on realm, reconfiguration scheduled in "
- << delay << 's' << dendl;
+ ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl;
}
void RGWRealmReloader::reload()
diff --git a/src/rgw/rgw_realm_reloader.h b/src/rgw/rgw_realm_reloader.h
index 3de54b1aabc..e4e3a436342 100644
--- a/src/rgw/rgw_realm_reloader.h
+++ b/src/rgw/rgw_realm_reloader.h
@@ -10,8 +10,8 @@
class RGWRados;
/**
- * RGWRealmReloader responds to notifications by recreating RGWRados with the
- * updated realm configuration.
+ * RGWRealmReloader responds to new period notifications by recreating RGWRados
+ * with the updated realm configuration.
*/
class RGWRealmReloader : public RGWRealmWatcher::Watcher {
public:
@@ -20,8 +20,7 @@ class RGWRealmReloader : public RGWRealmWatcher::Watcher {
* is required to ensure that they stop issuing requests on the old
* RGWRados instance, and restart with the updated configuration.
*
- * This abstraction avoids a depency on class RGWFrontend, which is only
- * defined in rgw_main.cc
+ * This abstraction avoids a depency on class RGWFrontend.
*/
class Pauser {
public:
@@ -50,9 +49,9 @@ class RGWRealmReloader : public RGWRealmWatcher::Watcher {
Pauser *const frontends;
/// reload() takes a significant amount of time, so we don't want to run
- /// it in the handle_notify() thread. we choose a timer thread because we
- /// also want to add a delay (see rgw_realm_reconfigure_delay) so that we
- /// can batch up notifications within that window
+ /// it in the handle_notify() thread. we choose a timer thread instead of a
+ /// Finisher because it allows us to cancel events that were scheduled while
+ /// reload() is still running
SafeTimer timer;
Mutex mutex; //< protects access to timer and reload_scheduled
Cond cond; //< to signal reload() after an invalid realm config
diff --git a/src/rgw/rgw_replica_log.h b/src/rgw/rgw_replica_log.h
index 45cac52d68c..699600c0fe4 100644
--- a/src/rgw/rgw_replica_log.h
+++ b/src/rgw/rgw_replica_log.h
@@ -22,8 +22,6 @@
class RGWRados;
class CephContext;
-using namespace std;
-
#define META_REPLICA_LOG_OBJ_PREFIX "meta.replicalog."
#define DATA_REPLICA_LOG_OBJ_PREFIX "data.replicalog."
diff --git a/src/rgw/rgw_request.h b/src/rgw/rgw_request.h
index d9fc69bee1c..3c835f7b1c9 100644
--- a/src/rgw/rgw_request.h
+++ b/src/rgw/rgw_request.h
@@ -12,8 +12,11 @@
#if defined(WITH_RADOSGW_FCGI_FRONTEND)
#include "rgw_fcgi.h"
#endif
+
#include "common/QueueRing.h"
+#include <atomic>
+
struct RGWRequest
{
uint64_t id;
@@ -56,10 +59,10 @@ struct RGWLoadGenRequest : public RGWRequest {
string method;
string resource;
int content_length;
- atomic_t* fail_flag;
+ std::atomic<int64_t>* fail_flag = nullptr;
RGWLoadGenRequest(uint64_t req_id, const string& _m, const string& _r, int _cl,
- atomic_t *ff)
+ std::atomic<int64_t> *ff)
: RGWRequest(req_id), method(_m), resource(_r), content_length(_cl),
fail_flag(ff) {}
};
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 175ceb9606b..140d434e737 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -1,6 +1,7 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
+
#include <errno.h>
#include <limits.h>
@@ -18,8 +19,6 @@
#include "rgw_rest_s3.h"
#include "rgw_swift_auth.h"
#include "rgw_cors_s3.h"
-#include "rgw_http_errors.h"
-#include "rgw_lib.h"
#include "rgw_client_io.h"
#include "rgw_resolve.h"
@@ -28,6 +27,53 @@
#define dout_subsys ceph_subsys_rgw
+struct rgw_http_status_code {
+ int code;
+ const char *name;
+};
+
+const static struct rgw_http_status_code http_codes[] = {
+ { 100, "Continue" },
+ { 200, "OK" },
+ { 201, "Created" },
+ { 202, "Accepted" },
+ { 204, "No Content" },
+ { 205, "Reset Content" },
+ { 206, "Partial Content" },
+ { 207, "Multi Status" },
+ { 208, "Already Reported" },
+ { 300, "Multiple Choices" },
+ { 301, "Moved Permanently" },
+ { 302, "Found" },
+ { 303, "See Other" },
+ { 304, "Not Modified" },
+ { 305, "User Proxy" },
+ { 306, "Switch Proxy" },
+ { 307, "Temporary Redirect" },
+ { 308, "Permanent Redirect" },
+ { 400, "Bad Request" },
+ { 401, "Unauthorized" },
+ { 402, "Payment Required" },
+ { 403, "Forbidden" },
+ { 404, "Not Found" },
+ { 405, "Method Not Allowed" },
+ { 406, "Not Acceptable" },
+ { 407, "Proxy Authentication Required" },
+ { 408, "Request Timeout" },
+ { 409, "Conflict" },
+ { 410, "Gone" },
+ { 411, "Length Required" },
+ { 412, "Precondition Failed" },
+ { 413, "Request Entity Too Large" },
+ { 414, "Request-URI Too Long" },
+ { 415, "Unsupported Media Type" },
+ { 416, "Requested Range Not Satisfiable" },
+ { 417, "Expectation Failed" },
+ { 422, "Unprocessable Entity" },
+ { 500, "Internal Server Error" },
+ { 501, "Not Implemented" },
+ { 0, NULL },
+};
struct rgw_http_attr {
const char *rgw_attr;
@@ -309,45 +355,6 @@ void rgw_flush_formatter(struct req_state *s, Formatter *formatter)
}
}
-void set_req_state_err(struct rgw_err& err, /* out */
- int err_no, /* in */
- const int prot_flags) /* in */
-{
- const struct rgw_http_errors *r;
-
- if (err_no < 0)
- err_no = -err_no;
- err.ret = -err_no;
- if (prot_flags & RGW_REST_SWIFT) {
- r = search_err(err_no, RGW_HTTP_SWIFT_ERRORS,
- ARRAY_LEN(RGW_HTTP_SWIFT_ERRORS));
- if (r) {
- err.http_ret = r->http_ret;
- err.s3_code = r->s3_code;
- return;
- }
- }
-
- r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
- if (r) {
- err.http_ret = r->http_ret;
- err.s3_code = r->s3_code;
- return;
- }
- dout(0) << "WARNING: set_req_state_err err_no=" << err_no
- << " resorting to 500" << dendl;
-
- err.http_ret = 500;
- err.s3_code = "UnknownError";
-}
-
-void set_req_state_err(struct req_state * const s, const int err_no)
-{
- if (s) {
- set_req_state_err(s->err, err_no, s->prot_flags);
- }
-}
-
void dump_errno(int http_ret, string& out) {
stringstream ss;
@@ -658,7 +665,7 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type,
dump_trans_id(s);
- if ((!s->err.is_err()) &&
+ if ((!s->is_err()) &&
(s->bucket_info.owner != s->user->user_id) &&
(s->bucket_info.requester_pays)) {
dump_header(s, "x-amz-request-charged", "requester");
@@ -675,7 +682,7 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type,
/* do not send content type if content length is zero
and the content type was not set by the user */
if (force_content_type ||
- (!content_type && s->formatter->get_len() != 0) || s->err.is_err()){
+ (!content_type && s->formatter->get_len() != 0) || s->is_err()){
switch (s->format) {
case RGW_FORMAT_XML:
ctype = "application/xml";
@@ -694,24 +701,9 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type,
ctype.append("; charset=utf-8");
content_type = ctype.c_str();
}
- if (!force_no_error && s->err.is_err()) {
+ if (!force_no_error && s->is_err()) {
dump_start(s);
- if (s->format != RGW_FORMAT_HTML) {
- s->formatter->open_object_section("Error");
- }
- if (!s->err.s3_code.empty())
- s->formatter->dump_string("Code", s->err.s3_code);
- if (!s->err.message.empty())
- s->formatter->dump_string("Message", s->err.message);
- if (!s->bucket_name.empty()) // TODO: connect to expose_bucket
- s->formatter->dump_string("BucketName", s->bucket_name);
- if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle
- s->formatter->dump_string("RequestId", s->trans_id);
- s->formatter->dump_string("HostId", s->host_id);
- if (s->format != RGW_FORMAT_HTML) {
- s->formatter->close_section();
- }
- s->formatter->output_footer();
+ dump(s);
dump_content_length(s, s->formatter->get_len());
} else {
if (proposed_content_length == CHUNKED_TRANSFER_ENCODING) {
@@ -736,8 +728,8 @@ void end_header(struct req_state* s, RGWOp* op, const char *content_type,
rgw_flush_formatter_and_reset(s, s->formatter);
}
-void abort_early(struct req_state *s, RGWOp *op, int err_no,
- RGWHandler* handler)
+void abort_early(struct req_state *s, RGWOp* op, int err_no,
+ RGWHandler* handler)
{
string error_content("");
if (!s->formatter) {
@@ -764,12 +756,10 @@ void abort_early(struct req_state *s, RGWOp *op, int err_no,
// returned 0. If non-zero, we need to continue here.
if (err_no) {
// Watch out, we might have a custom error state already set!
- if (s->err.http_ret && s->err.http_ret != 200) {
- dump_errno(s);
- } else {
+ if (!s->err.http_ret || s->err.http_ret == 200) {
set_req_state_err(s, err_no);
- dump_errno(s);
}
+ dump_errno(s);
dump_bucket_from_state(s);
if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
string dest_uri;
@@ -1236,6 +1226,308 @@ int RGWPutObj_ObjStore::get_data(bufferlist& bl)
return len;
}
+
+/*
+ * parses params in the format: 'first; param1=foo; param2=bar'
+ */
+void RGWPostObj_ObjStore::parse_boundary_params(const std::string& params_str,
+ std::string& first,
+ std::map<std::string,
+ std::string>& params)
+{
+ size_t pos = params_str.find(';');
+ if (std::string::npos == pos) {
+ first = rgw_trim_whitespace(params_str);
+ return;
+ }
+
+ first = rgw_trim_whitespace(params_str.substr(0, pos));
+ pos++;
+
+ while (pos < params_str.size()) {
+ size_t end = params_str.find(';', pos);
+ if (std::string::npos == end) {
+ end = params_str.size();
+ }
+
+ std::string param = params_str.substr(pos, end - pos);
+ size_t eqpos = param.find('=');
+
+ if (std::string::npos != eqpos) {
+ std::string param_name = rgw_trim_whitespace(param.substr(0, eqpos));
+ std::string val = rgw_trim_quotes(param.substr(eqpos + 1));
+ params[std::move(param_name)] = std::move(val);
+ } else {
+ params[rgw_trim_whitespace(param)] = "";
+ }
+
+ pos = end + 1;
+ }
+}
+
+int RGWPostObj_ObjStore::parse_part_field(const std::string& line,
+ std::string& field_name, /* out */
+ post_part_field& field) /* out */
+{
+ size_t pos = line.find(':');
+ if (pos == string::npos)
+ return -EINVAL;
+
+ field_name = line.substr(0, pos);
+ if (pos >= line.size() - 1)
+ return 0;
+
+ parse_boundary_params(line.substr(pos + 1), field.val, field.params);
+
+ return 0;
+}
+
+static bool is_crlf(const char *s)
+{
+ return (*s == '\r' && *(s + 1) == '\n');
+}
+
+/*
+ * find the index of the boundary, if exists, or optionally the next end of line
+ * also returns how many bytes to skip
+ */
+static int index_of(ceph::bufferlist& bl,
+ uint64_t max_len,
+ const std::string& str,
+ const bool check_crlf,
+ bool& reached_boundary,
+ int& skip)
+{
+ reached_boundary = false;
+ skip = 0;
+
+ if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks)
+ return -EINVAL;
+
+ if (bl.length() < str.size())
+ return -1;
+
+ const char *buf = bl.c_str();
+ const char *s = str.c_str();
+
+ if (max_len > bl.length())
+ max_len = bl.length();
+
+ for (uint64_t i = 0; i < max_len; i++, buf++) {
+ if (check_crlf &&
+ i >= 1 &&
+ is_crlf(buf - 1)) {
+ return i + 1; // skip the crlf
+ }
+ if ((i < max_len - str.size() + 1) &&
+ (buf[0] == s[0] && buf[1] == s[1]) &&
+ (strncmp(buf, s, str.size()) == 0)) {
+ reached_boundary = true;
+ skip = str.size();
+
+ /* oh, great, now we need to swallow the preceding crlf
+ * if exists
+ */
+ if ((i >= 2) &&
+ is_crlf(buf - 2)) {
+ i -= 2;
+ skip += 2;
+ }
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+int RGWPostObj_ObjStore::read_with_boundary(ceph::bufferlist& bl,
+ uint64_t max,
+ const bool check_crlf,
+ bool& reached_boundary,
+ bool& done)
+{
+ uint64_t cl = max + 2 + boundary.size();
+
+ if (max > in_data.length()) {
+ uint64_t need_to_read = cl - in_data.length();
+
+ bufferptr bp(need_to_read);
+
+ const auto read_len = recv_body(s, bp.c_str(), need_to_read);
+ if (read_len < 0) {
+ return read_len;
+ }
+ in_data.append(bp, 0, read_len);
+ }
+
+ done = false;
+ int skip;
+ const int index = index_of(in_data, cl, boundary, check_crlf,
+ reached_boundary, skip);
+ if (index >= 0) {
+ max = index;
+ }
+
+ if (max > in_data.length()) {
+ max = in_data.length();
+ }
+
+ bl.substr_of(in_data, 0, max);
+
+ ceph::bufferlist new_read_data;
+
+ /*
+ * now we need to skip boundary for next time, also skip any crlf, or
+ * check to see if it's the last final boundary (marked with "--" at the end
+ */
+ if (reached_boundary) {
+ int left = in_data.length() - max;
+ if (left < skip + 2) {
+ int need = skip + 2 - left;
+ bufferptr boundary_bp(need);
+ const int r = recv_body(s, boundary_bp.c_str(), need);
+ if (r < 0) {
+ return r;
+ }
+ in_data.append(boundary_bp);
+ }
+ max += skip; // skip boundary for next time
+ if (in_data.length() >= max + 2) {
+ const char *data = in_data.c_str();
+ if (is_crlf(data + max)) {
+ max += 2;
+ } else {
+ if (*(data + max) == '-' &&
+ *(data + max + 1) == '-') {
+ done = true;
+ max += 2;
+ }
+ }
+ }
+ }
+
+ new_read_data.substr_of(in_data, max, in_data.length() - max);
+ in_data = new_read_data;
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore::read_line(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+{
+ return read_with_boundary(bl, max, true, reached_boundary, done);
+}
+
+int RGWPostObj_ObjStore::read_data(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+{
+ return read_with_boundary(bl, max, false, reached_boundary, done);
+}
+
+
+int RGWPostObj_ObjStore::read_form_part_header(struct post_form_part* const part,
+ bool& done)
+{
+ bufferlist bl;
+ bool reached_boundary;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ int r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ }
+
+ if (done) {
+ return 0;
+ }
+
+ if (reached_boundary) { // skip the first boundary
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ } else if (done) {
+ return 0;
+ }
+ }
+
+ while (true) {
+ /*
+ * iterate through fields
+ */
+ std::string line = rgw_trim_whitespace(string(bl.c_str(), bl.length()));
+
+ if (line.empty()) {
+ break;
+ }
+
+ struct post_part_field field;
+
+ string field_name;
+ r = parse_part_field(line, field_name, field);
+ if (r < 0) {
+ return r;
+ }
+
+ part->fields[field_name] = field;
+
+ if (stringcasecmp(field_name, "Content-Disposition") == 0) {
+ part->name = field.params["name"];
+ }
+
+ if (reached_boundary) {
+ break;
+ }
+
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ }
+
+ return 0;
+}
+
+bool RGWPostObj_ObjStore::part_str(parts_collection_t& parts,
+ const std::string& name,
+ std::string* val)
+{
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+
+ ceph::bufferlist& data = iter->second.data;
+ std::string str = string(data.c_str(), data.length());
+ *val = rgw_trim_whitespace(str);
+ return true;
+}
+
+std::string RGWPostObj_ObjStore::get_part_str(parts_collection_t& parts,
+ const std::string& name,
+ const std::string& def_val)
+{
+ std::string val;
+
+ if (part_str(parts, name, &val)) {
+ return val;
+ } else {
+ return rgw_trim_whitespace(def_val);
+ }
+}
+
+bool RGWPostObj_ObjStore::part_bl(parts_collection_t& parts,
+ const std::string& name,
+ ceph::bufferlist* pbl)
+{
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+
+ *pbl = iter->second.data;
+ return true;
+}
+
int RGWPostObj_ObjStore::verify_params()
{
/* check that we have enough memory to store the object
@@ -1252,6 +1544,51 @@ int RGWPostObj_ObjStore::verify_params()
return 0;
}
+int RGWPostObj_ObjStore::get_params()
+{
+ if (s->expect_cont) {
+ /* OK, here it really gets ugly. With POST, the params are embedded in the
+ * request body, so we need to continue before being able to actually look
+ * at them. This diverts from the usual request flow. */
+ dump_continue(s);
+ s->expect_cont = false;
+ }
+
+ std::string req_content_type_str = s->info.env->get("CONTENT_TYPE", "");
+ std::string req_content_type;
+ std::map<std::string, std::string> params;
+ parse_boundary_params(req_content_type_str, req_content_type, params);
+
+ if (req_content_type.compare("multipart/form-data") != 0) {
+ err_msg = "Request Content-Type is not multipart/form-data";
+ return -EINVAL;
+ }
+
+ if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
+ ldout(s->cct, 20) << "request content_type_str="
+ << req_content_type_str << dendl;
+ ldout(s->cct, 20) << "request content_type params:" << dendl;
+
+ for (const auto& pair : params) {
+ ldout(s->cct, 20) << " " << pair.first << " -> " << pair.second
+ << dendl;
+ }
+ }
+
+ const auto iter = params.find("boundary");
+ if (std::end(params) == iter) {
+ err_msg = "Missing multipart boundary specification";
+ return -EINVAL;
+ }
+
+ /* Create the boundary. */
+ boundary = "--";
+ boundary.append(iter->second);
+
+ return 0;
+}
+
+
int RGWPutACLs_ObjStore::get_params()
{
const auto max_size = s->cct->_conf->rgw_max_put_param_size;
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index c618f532f3c..41597a2d476 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -218,6 +218,67 @@ public:
class RGWPostObj_ObjStore : public RGWPostObj
{
+ std::string boundary;
+
+public:
+ struct post_part_field {
+ std::string val;
+ std::map<std::string, std::string> params;
+ };
+
+ struct post_form_part {
+ std::string name;
+ std::map<std::string, post_part_field, ltstr_nocase> fields;
+ ceph::bufferlist data;
+ };
+
+protected:
+ using parts_collection_t = \
+ std::map<std::string, post_form_part, const ltstr_nocase>;
+
+ std::string err_msg;
+ ceph::bufferlist in_data;
+
+ int read_with_boundary(ceph::bufferlist& bl,
+ uint64_t max,
+ bool check_eol,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_line(ceph::bufferlist& bl,
+ uint64_t max,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_data(ceph::bufferlist& bl,
+ uint64_t max,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_form_part_header(struct post_form_part *part, bool& done);
+
+ int get_params() override;
+
+ static int parse_part_field(const std::string& line,
+ std::string& field_name, /* out */
+ post_part_field& field); /* out */
+
+ static void parse_boundary_params(const std::string& params_str,
+ std::string& first,
+ std::map<std::string, std::string>& params);
+
+ static bool part_str(parts_collection_t& parts,
+ const std::string& name,
+ std::string *val);
+
+ static std::string get_part_str(parts_collection_t& parts,
+ const std::string& name,
+ const std::string& def_val = std::string());
+
+ static bool part_bl(parts_collection_t& parts,
+ const std::string& name,
+ ceph::bufferlist *pbl);
+
public:
RGWPostObj_ObjStore() {}
~RGWPostObj_ObjStore() override {}
@@ -225,6 +286,7 @@ public:
int verify_params() override;
};
+
class RGWPutMetadataAccount_ObjStore : public RGWPutMetadataAccount
{
public:
@@ -564,8 +626,6 @@ public:
static constexpr int64_t NO_CONTENT_LENGTH = -1;
static constexpr int64_t CHUNKED_TRANSFER_ENCODING = -2;
-extern void set_req_state_err(struct rgw_err &err, int err_no, int prot_flags);
-extern void set_req_state_err(struct req_state *s, int err_no);
extern void dump_errno(int http_ret, string& out);
extern void dump_errno(const struct rgw_err &err, string& out);
extern void dump_errno(struct req_state *s);
diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc
index 8699624002b..71a11554d2c 100644
--- a/src/rgw/rgw_rest_conn.cc
+++ b/src/rgw/rgw_rest_conn.cc
@@ -19,6 +19,27 @@ RGWRESTConn::RGWRESTConn(CephContext *_cct, RGWRados *store,
}
}
+RGWRESTConn::RGWRESTConn(RGWRESTConn&& other)
+ : cct(other.cct),
+ endpoints(std::move(other.endpoints)),
+ key(std::move(other.key)),
+ self_zone_group(std::move(other.self_zone_group)),
+ remote_id(std::move(other.remote_id)),
+ counter(other.counter.load())
+{
+}
+
+RGWRESTConn& RGWRESTConn::operator=(RGWRESTConn&& other)
+{
+ cct = other.cct;
+ endpoints = std::move(other.endpoints);
+ key = std::move(other.key);
+ self_zone_group = std::move(other.self_zone_group);
+ remote_id = std::move(other.remote_id);
+ counter = other.counter.load();
+ return *this;
+}
+
int RGWRESTConn::get_url(string& endpoint)
{
if (endpoints.empty()) {
@@ -26,7 +47,7 @@ int RGWRESTConn::get_url(string& endpoint)
return -EIO;
}
- int i = counter.inc();
+ int i = ++counter;
endpoint = endpoints[i % endpoints.size()];
return 0;
@@ -40,7 +61,7 @@ string RGWRESTConn::get_url()
return endpoint;
}
- int i = counter.inc();
+ int i = ++counter;
endpoint = endpoints[i % endpoints.size()];
return endpoint;
@@ -182,7 +203,13 @@ int RGWRESTConn::get_obj(const rgw_user& uid, req_info *info /* optional */, rgw
set_header(mod_pg_ver, extra_headers, "HTTP_DEST_PG_VER");
}
- return (*req)->get_obj(key, extra_headers, obj);
+ int r = (*req)->get_obj(key, extra_headers, obj);
+ if (r < 0) {
+ delete *req;
+ *req = nullptr;
+ }
+
+ return r;
}
int RGWRESTConn::complete_request(RGWRESTStreamRWRequest *req, string& etag, real_time *mtime,
diff --git a/src/rgw/rgw_rest_conn.h b/src/rgw/rgw_rest_conn.h
index 674387ffa05..bec829d6939 100644
--- a/src/rgw/rgw_rest_conn.h
+++ b/src/rgw/rgw_rest_conn.h
@@ -9,6 +9,7 @@
#include "common/ceph_json.h"
#include "common/RefCountedObj.h"
+#include <atomic>
class CephContext;
class RGWRados;
@@ -55,11 +56,15 @@ class RGWRESTConn
RGWAccessKey key;
string self_zone_group;
string remote_id;
- atomic_t counter;
+ std::atomic<int64_t> counter = { 0 };
public:
RGWRESTConn(CephContext *_cct, RGWRados *store, const string& _remote_id, const list<string>& endpoints);
+ // custom move needed for atomic
+ RGWRESTConn(RGWRESTConn&& other);
+ RGWRESTConn& operator=(RGWRESTConn&& other);
+
int get_url(string& endpoint);
string get_url();
const string& get_self_zonegroup() {
diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc
index 973af3c8891..9220917c99a 100644
--- a/src/rgw/rgw_rest_log.cc
+++ b/src/rgw/rgw_rest_log.cc
@@ -20,6 +20,7 @@
#include "rgw_client_io.h"
#include "rgw_sync.h"
#include "rgw_data_sync.h"
+#include "rgw_common.h"
#include "common/errno.h"
#include "include/assert.h"
@@ -863,8 +864,7 @@ void RGWOp_MDLog_Status::execute()
http_ret = -ENOENT;
return;
}
- http_ret = sync->read_sync_status();
- status = sync->get_sync_status();
+ http_ret = sync->read_sync_status(&status);
}
void RGWOp_MDLog_Status::send_response()
diff --git a/src/rgw/rgw_rest_realm.cc b/src/rgw/rgw_rest_realm.cc
index d5ef848b805..0f5abcdfe05 100644
--- a/src/rgw/rgw_rest_realm.cc
+++ b/src/rgw/rgw_rest_realm.cc
@@ -26,9 +26,7 @@ class RGWOp_Period_Base : public RGWRESTOp {
// reply with the period object on success
void RGWOp_Period_Base::send_response()
{
- s->err.message = error_stream.str();
-
- set_req_state_err(s, http_ret);
+ set_req_state_err(s, http_ret, error_stream.str());
dump_errno(s);
if (http_ret < 0) {
diff --git a/src/rgw/rgw_rest_role.cc b/src/rgw/rgw_rest_role.cc
index 386504398fe..40473fba67f 100644
--- a/src/rgw/rgw_rest_role.cc
+++ b/src/rgw/rgw_rest_role.cc
@@ -1,3 +1,5 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#include <errno.h>
#include "common/errno.h"
@@ -15,8 +17,6 @@
#define dout_subsys ceph_subsys_rgw
-using namespace std;
-
void RGWRestRole::send_response()
{
if (op_ret) {
@@ -77,9 +77,7 @@ void RGWCreateRole::execute()
if (op_ret < 0) {
return;
}
- string uid;
- s->user->user_id.to_str(uid);
- RGWRole role(s->cct, store, role_name, role_path, trust_policy, uid);
+ RGWRole role(s->cct, store, role_name, role_path, trust_policy, s->user->user_id.tenant);
op_ret = role.create(true);
if (op_ret == -EEXIST) {
@@ -111,7 +109,7 @@ void RGWDeleteRole::execute()
if (op_ret < 0) {
return;
}
- RGWRole role(s->cct, store, role_name);
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
op_ret = role.delete_obj();
if (op_ret == -ENOENT) {
@@ -137,7 +135,7 @@ void RGWGetRole::execute()
if (op_ret < 0) {
return;
}
- RGWRole role(s->cct, store, role_name);
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == -ENOENT) {
@@ -175,7 +173,7 @@ void RGWModifyRole::execute()
if (op_ret < 0) {
return;
}
- RGWRole role(s->cct, store, role_name);
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == -ENOENT) {
op_ret = -ERR_NO_ROLE_FOUND;
@@ -201,7 +199,7 @@ void RGWListRoles::execute()
return;
}
vector<RGWRole> result;
- op_ret = RGWRole::get_roles_by_path_prefix(store, s->cct, path_prefix, result);
+ op_ret = RGWRole::get_roles_by_path_prefix(store, s->cct, path_prefix, s->user->user_id.tenant, result);
if (op_ret == 0) {
s->formatter->open_array_section("Roles");
@@ -240,7 +238,7 @@ void RGWPutRolePolicy::execute()
return;
}
- RGWRole role(s->cct, store, role_name);
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == 0) {
role.set_perm_policy(policy_name, perm_policy);
@@ -267,7 +265,7 @@ void RGWGetRolePolicy::execute()
return;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == -ENOENT) {
@@ -306,7 +304,7 @@ void RGWListRolePolicies::execute()
return;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == -ENOENT) {
@@ -342,7 +340,7 @@ void RGWDeleteRolePolicy::execute()
return;
}
- RGWRole role(g_ceph_context, store, role_name);
+ RGWRole role(g_ceph_context, store, role_name, s->user->user_id.tenant);
op_ret = role.get();
if (op_ret == -ENOENT) {
diff --git a/src/rgw/rgw_rest_role.h b/src/rgw/rgw_rest_role.h
index de3ec97a637..7a99dbe45c6 100644
--- a/src/rgw/rgw_rest_role.h
+++ b/src/rgw/rgw_rest_role.h
@@ -1,3 +1,5 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
#ifndef CEPH_RGW_REST_ROLE_H
#define CEPH_RGW_REST_ROLE_H
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index 1c9e4d751b0..96285e258ff 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -65,14 +65,13 @@ void dump_bucket(struct req_state *s, RGWBucketEnt& obj)
s->formatter->close_section();
}
-void rgw_get_errno_s3(rgw_http_errors *e , int err_no)
+void rgw_get_errno_s3(rgw_http_error *e , int err_no)
{
- const struct rgw_http_errors *r;
- r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
+ rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no);
- if (r) {
- e->http_ret = r->http_ret;
- e->s3_code = r->s3_code;
+ if (r != rgw_http_s3_errors.end()) {
+ e->http_ret = r->second.first;
+ e->s3_code = r->second.second;
} else {
e->http_ret = 500;
e->s3_code = "UnknownError";
@@ -173,7 +172,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,
dump_errno(s, custom_http_ret);
} else {
set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT
- : op_ret);
+ : op_ret);
dump_errno(s);
}
@@ -1406,7 +1405,6 @@ static inline int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& o
RGWRados::Object::Read read_op(&op_target);
read_op.params.attrs = &attrs;
- read_op.params.perr = &s->err;
return read_op.prepare();
}
@@ -1494,265 +1492,6 @@ int RGWPutObj_ObjStore_S3::get_encrypt_filter(
}
return res;
}
-/*
- * parses params in the format: 'first; param1=foo; param2=bar'
- */
-static void parse_params(const string& params_str, string& first,
- map<string, string>& params)
-{
- size_t pos = params_str.find(';');
- if (pos == string::npos) {
- first = rgw_trim_whitespace(params_str);
- return;
- }
-
- first = rgw_trim_whitespace(params_str.substr(0, pos));
-
- pos++;
-
- while (pos < params_str.size()) {
- size_t end = params_str.find(';', pos);
- if (end == string::npos)
- end = params_str.size();
-
- string param = params_str.substr(pos, end - pos);
-
- size_t eqpos = param.find('=');
- if (eqpos != string::npos) {
- params[rgw_trim_whitespace(param.substr(0, eqpos))] =
- rgw_trim_quotes(param.substr(eqpos + 1));
- } else {
- params[rgw_trim_whitespace(param)] = "";
- }
-
- pos = end + 1;
- }
-}
-
-static int parse_part_field(const string& line, string& field_name,
- struct post_part_field& field)
-{
- size_t pos = line.find(':');
- if (pos == string::npos)
- return -EINVAL;
-
- field_name = line.substr(0, pos);
- if (pos >= line.size() - 1)
- return 0;
-
- parse_params(line.substr(pos + 1), field.val, field.params);
-
- return 0;
-}
-
-bool is_crlf(const char *s)
-{
- return (*s == '\r' && *(s + 1) == '\n');
-}
-
-/*
- * find the index of the boundary, if exists, or optionally the next end of line
- * also returns how many bytes to skip
- */
-static int index_of(bufferlist& bl, int max_len, const string& str,
- bool check_crlf,
- bool *reached_boundary, int *skip)
-{
- *reached_boundary = false;
- *skip = 0;
-
- if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks)
- return -EINVAL;
-
- if (bl.length() < str.size())
- return -1;
-
- const char *buf = bl.c_str();
- const char *s = str.c_str();
-
- if (max_len > (int)bl.length())
- max_len = bl.length();
-
- int i;
- for (i = 0; i < max_len; i++, buf++) {
- if (check_crlf &&
- i >= 1 &&
- is_crlf(buf - 1)) {
- return i + 1; // skip the crlf
- }
- if ((i < max_len - (int)str.size() + 1) &&
- (buf[0] == s[0] && buf[1] == s[1]) &&
- (strncmp(buf, s, str.size()) == 0)) {
- *reached_boundary = true;
- *skip = str.size();
-
- /* oh, great, now we need to swallow the preceding crlf
- * if exists
- */
- if ((i >= 2) &&
- is_crlf(buf - 2)) {
- i -= 2;
- *skip += 2;
- }
- return i;
- }
- }
-
- return -1;
-}
-
-int RGWPostObj_ObjStore_S3::read_with_boundary(bufferlist& bl, uint64_t max,
- bool check_crlf,
- bool *reached_boundary,
- bool *done)
-{
- uint64_t cl = max + 2 + boundary.size();
-
- if (max > in_data.length()) {
- uint64_t need_to_read = cl - in_data.length();
-
- bufferptr bp(need_to_read);
-
- const auto read_len = recv_body(s, bp.c_str(), need_to_read);
- in_data.append(bp, 0, read_len);
- }
-
- *done = false;
- int skip;
- int index = index_of(in_data, cl, boundary, check_crlf, reached_boundary,
- &skip);
- if (index >= 0)
- max = index;
-
- if (max > in_data.length())
- max = in_data.length();
-
- bl.substr_of(in_data, 0, max);
-
- bufferlist new_read_data;
-
- /*
- * now we need to skip boundary for next time, also skip any crlf, or
- * check to see if it's the last final boundary (marked with "--" at the end
- */
- if (*reached_boundary) {
- int left = in_data.length() - max;
- if (left < skip + 2) {
- int need = skip + 2 - left;
- bufferptr boundary_bp(need);
- recv_body(s, boundary_bp.c_str(), need);
- in_data.append(boundary_bp);
- }
- max += skip; // skip boundary for next time
- if (in_data.length() >= max + 2) {
- const char *data = in_data.c_str();
- if (is_crlf(data + max)) {
- max += 2;
- } else {
- if (*(data + max) == '-' &&
- *(data + max + 1) == '-') {
- *done = true;
- max += 2;
- }
- }
- }
- }
-
- new_read_data.substr_of(in_data, max, in_data.length() - max);
- in_data = new_read_data;
-
- return 0;
-}
-
-int RGWPostObj_ObjStore_S3::read_line(bufferlist& bl, uint64_t max,
- bool *reached_boundary, bool *done)
-{
- return read_with_boundary(bl, max, true, reached_boundary, done);
-}
-
-int RGWPostObj_ObjStore_S3::read_data(bufferlist& bl, uint64_t max,
- bool *reached_boundary, bool *done)
-{
- return read_with_boundary(bl, max, false, reached_boundary, done);
-}
-
-
-int RGWPostObj_ObjStore_S3::read_form_part_header(struct post_form_part *part,
- bool *done)
-{
- bufferlist bl;
- bool reached_boundary;
- uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
- int r = read_line(bl, chunk_size, &reached_boundary, done);
- if (r < 0)
- return r;
-
- if (*done) {
- return 0;
- }
-
- if (reached_boundary) { // skip the first boundary
- r = read_line(bl, chunk_size, &reached_boundary, done);
- if (r < 0)
- return r;
- if (*done)
- return 0;
- }
-
- while (true) {
- /*
- * iterate through fields
- */
- string line = rgw_trim_whitespace(string(bl.c_str(), bl.length()));
-
- if (line.empty())
- break;
-
- struct post_part_field field;
-
- string field_name;
- r = parse_part_field(line, field_name, field);
- if (r < 0)
- return r;
-
- part->fields[field_name] = field;
-
- if (stringcasecmp(field_name, "Content-Disposition") == 0) {
- part->name = field.params["name"];
- }
-
- if (reached_boundary)
- break;
-
- r = read_line(bl, chunk_size, &reached_boundary, done);
- }
-
- return 0;
-}
-
-bool RGWPostObj_ObjStore_S3::part_str(const string& name, string *val)
-{
- map<string, struct post_form_part, ltstr_nocase>::iterator iter
- = parts.find(name);
- if (iter == parts.end())
- return false;
-
- bufferlist& data = iter->second.data;
- string str = string(data.c_str(), data.length());
- *val = rgw_trim_whitespace(str);
- return true;
-}
-
-bool RGWPostObj_ObjStore_S3::part_bl(const string& name, bufferlist *pbl)
-{
- map<string, struct post_form_part, ltstr_nocase>::iterator iter =
- parts.find(name);
- if (iter == parts.end())
- return false;
-
- *pbl = iter->second.data;
- return true;
-}
void RGWPostObj_ObjStore_S3::rebuild_key(string& key)
{
@@ -1768,74 +1507,47 @@ void RGWPostObj_ObjStore_S3::rebuild_key(string& key)
key = new_key;
}
-int RGWPostObj_ObjStore_S3::get_params()
+std::string RGWPostObj_ObjStore_S3::get_current_filename() const
{
- // get the part boundary
- string req_content_type_str = s->info.env->get("CONTENT_TYPE", "");
- string req_content_type;
- map<string, string> params;
-
- if (s->expect_cont) {
- /* ok, here it really gets ugly. With POST, the params are embedded in the
- * request body, so we need to continue before being able to actually look
- * at them. This diverts from the usual request flow.
- */
- dump_continue(s);
- s->expect_cont = false;
- }
-
- parse_params(req_content_type_str, req_content_type, params);
+ return s->object.name;
+}
- if (req_content_type.compare("multipart/form-data") != 0) {
- err_msg = "Request Content-Type is not multipart/form-data";
- return -EINVAL;
- }
+std::string RGWPostObj_ObjStore_S3::get_current_content_type() const
+{
+ return content_type;
+}
- if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
- ldout(s->cct, 20) << "request content_type_str="
- << req_content_type_str << dendl;
- ldout(s->cct, 20) << "request content_type params:" << dendl;
- map<string, string>::iterator iter;
- for (iter = params.begin(); iter != params.end(); ++iter) {
- ldout(s->cct, 20) << " " << iter->first << " -> " << iter->second
- << dendl;
- }
+int RGWPostObj_ObjStore_S3::get_params()
+{
+ op_ret = RGWPostObj_ObjStore::get_params();
+ if (op_ret < 0) {
+ return op_ret;
}
ldout(s->cct, 20) << "adding bucket to policy env: " << s->bucket.name
<< dendl;
env.add_var("bucket", s->bucket.name);
- map<string, string>::iterator iter = params.find("boundary");
- if (iter == params.end()) {
- err_msg = "Missing multipart boundary specification";
- return -EINVAL;
- }
-
- // create the boundary
- boundary = "--";
- boundary.append(iter->second);
-
bool done;
do {
struct post_form_part part;
- int r = read_form_part_header(&part, &done);
+ int r = read_form_part_header(&part, done);
if (r < 0)
return r;
if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
- map<string, struct post_part_field, ltstr_nocase>::iterator piter;
- for (piter = part.fields.begin(); piter != part.fields.end(); ++piter) {
- ldout(s->cct, 20) << "read part header: name=" << part.name
- << " content_type=" << part.content_type << dendl;
- ldout(s->cct, 20) << "name=" << piter->first << dendl;
- ldout(s->cct, 20) << "val=" << piter->second.val << dendl;
- ldout(s->cct, 20) << "params:" << dendl;
- map<string, string>& params = piter->second.params;
- for (iter = params.begin(); iter != params.end(); ++iter) {
- ldout(s->cct, 20) << " " << iter->first << " -> " << iter->second
- << dendl;
- }
+ ldout(s->cct, 20) << "read part header -- part.name="
+ << part.name << dendl;
+
+ for (const auto& pair : part.fields) {
+ ldout(s->cct, 20) << "field.name=" << pair.first << dendl;
+ ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl;
+ ldout(s->cct, 20) << "field.params:" << dendl;
+
+ for (const auto& param_pair : pair.second.params) {
+ ldout(s->cct, 20) << " " << param_pair.first
+ << " -> " << param_pair.second << dendl;
+ }
}
}
@@ -1851,13 +1563,12 @@ int RGWPostObj_ObjStore_S3::get_params()
filename = iter->second;
}
parts[part.name] = part;
- data_pending = true;
break;
}
bool boundary;
uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
- r = read_data(part.data, chunk_size, &boundary, &done);
+ r = read_data(part.data, chunk_size, boundary, done);
if (!boundary) {
err_msg = "Couldn't find boundary";
return -EINVAL;
@@ -1868,7 +1579,7 @@ int RGWPostObj_ObjStore_S3::get_params()
} while (!done);
string object_str;
- if (!part_str("key", &object_str)) {
+ if (!part_str(parts, "key", &object_str)) {
err_msg = "Key not specified";
return -EINVAL;
}
@@ -1884,7 +1595,7 @@ int RGWPostObj_ObjStore_S3::get_params()
env.add_var("key", s->object.name);
- part_str("Content-Type", &content_type);
+ part_str(parts, "Content-Type", &content_type);
env.add_var("Content-Type", content_type);
map<string, struct post_form_part, ltstr_nocase>::iterator piter =
@@ -1935,16 +1646,16 @@ int RGWPostObj_ObjStore_S3::get_params()
int RGWPostObj_ObjStore_S3::get_policy()
{
- if (part_bl("policy", &s->auth.s3_postobj_creds.encoded_policy)) {
-
+ if (part_bl(parts, "policy", &s->auth.s3_postobj_creds.encoded_policy)) {
// check that the signature matches the encoded policy
- if (! part_str("AWSAccessKeyId", &s->auth.s3_postobj_creds.access_key)) {
+ if (!part_str(parts, "AWSAccessKeyId",
+ &s->auth.s3_postobj_creds.access_key)) {
ldout(s->cct, 0) << "No S3 access key found!" << dendl;
err_msg = "Missing access key";
return -EINVAL;
}
- string received_signature_str;
- if (! part_str("signature", &s->auth.s3_postobj_creds.signature)) {
+
+ if (!part_str(parts, "signature", &s->auth.s3_postobj_creds.signature)) {
ldout(s->cct, 0) << "No signature found!" << dendl;
err_msg = "Missing signature";
return -EINVAL;
@@ -2020,7 +1731,7 @@ int RGWPostObj_ObjStore_S3::get_policy()
}
string canned_acl;
- part_str("acl", &canned_acl);
+ part_str(parts, "acl", &canned_acl);
RGWAccessControlPolicy_S3 s3policy(s->cct);
ldout(s->cct, 20) << "canned_acl=" << canned_acl << dendl;
@@ -2039,44 +1750,47 @@ int RGWPostObj_ObjStore_S3::complete_get_params()
bool done;
do {
struct post_form_part part;
- int r = read_form_part_header(&part, &done);
- if (r < 0)
+ int r = read_form_part_header(&part, done);
+ if (r < 0) {
return r;
+ }
- bufferlist part_data;
+ ceph::bufferlist part_data;
bool boundary;
uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
- r = read_data(part.data, chunk_size, &boundary, &done);
+ r = read_data(part.data, chunk_size, boundary, done);
if (!boundary) {
return -EINVAL;
}
- parts[part.name] = part;
+ /* Just reading the data but not storing any results of that. */
} while (!done);
return 0;
}
-int RGWPostObj_ObjStore_S3::get_data(bufferlist& bl)
+int RGWPostObj_ObjStore_S3::get_data(ceph::bufferlist& bl, bool& again)
{
bool boundary;
bool done;
- uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
- int r = read_data(bl, chunk_size, &boundary, &done);
- if (r < 0)
+ const uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ int r = read_data(bl, chunk_size, boundary, done);
+ if (r < 0) {
return r;
+ }
if (boundary) {
- data_pending = false;
-
- if (!done) { /* reached end of data, let's drain the rest of the params */
+ if (!done) {
+ /* Reached end of data, let's drain the rest of the params */
r = complete_get_params();
- if (r < 0)
- return r;
+ if (r < 0) {
+ return r;
+ }
}
}
+ again = !boundary;
return bl.length();
}
@@ -2085,7 +1799,7 @@ void RGWPostObj_ObjStore_S3::send_response()
if (op_ret == 0 && parts.count("success_action_redirect")) {
string redirect;
- part_str("success_action_redirect", &redirect);
+ part_str(parts, "success_action_redirect", &redirect);
string tenant;
string bucket;
@@ -2135,7 +1849,7 @@ void RGWPostObj_ObjStore_S3::send_response()
string status_string;
uint32_t status_int;
- part_str("success_action_status", &status_string);
+ part_str(parts, "success_action_status", &status_string);
int r = stringtoul(status_string, &status_int);
if (r < 0) {
@@ -2938,7 +2652,7 @@ void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(rgw_obj_key& key,
}
s->formatter->close_section();
} else if (op_ret < 0) {
- struct rgw_http_errors r;
+ struct rgw_http_error r;
int err_no;
s->formatter->open_object_section("Error");
@@ -3073,6 +2787,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_get()
return new RGWListBucketMultiparts_ObjStore_S3;
} else if(is_lc_op()) {
return new RGWGetLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWGetBucketPolicy;
}
return get_obj_op(true);
}
@@ -3107,6 +2823,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_put()
return new RGWSetRequestPayment_ObjStore_S3;
} else if(is_lc_op()) {
return new RGWPutLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWPutBucketPolicy;
}
return new RGWCreateBucket_ObjStore_S3;
}
@@ -3117,6 +2835,8 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_delete()
return new RGWDeleteCORS_ObjStore_S3;
} else if(is_lc_op()) {
return new RGWDeleteLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWDeleteBucketPolicy;
}
if (s->info.args.sub_resource_exists("website")) {
@@ -3936,7 +3656,8 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s, bool force_b
case RGW_OP_DELETE_MULTI_OBJ:
case RGW_OP_ADMIN_SET_METADATA:
case RGW_OP_SET_BUCKET_WEBSITE:
- break;
+ case RGW_OP_PUT_BUCKET_POLICY:
+ break;
default:
dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl;
return -ERR_NOT_IMPLEMENTED;
@@ -4253,11 +3974,11 @@ int RGWHandler_REST_S3Website::serve_errordoc(int http_ret, const string& errord
int RGWHandler_REST_S3Website::error_handler(int err_no,
string* error_content) {
int new_err_no = -1;
- const struct rgw_http_errors* r;
+ rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no > 0 ? err_no : -err_no);
int http_error_code = -1;
- r = search_err(err_no > 0 ? err_no : -err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
- if (r) {
- http_error_code = r->http_ret;
+
+ if (r != rgw_http_s3_errors.end()) {
+ http_error_code = r->second.first;
}
ldout(s->cct, 10) << "RGWHandler_REST_S3Website::error_handler err_no=" << err_no << " http_ret=" << http_error_code << dendl;
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index 291b04b9281..8dcf242b283 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -27,7 +27,12 @@
#define RGW_AUTH_GRACE_MINS 15
-void rgw_get_errno_s3(struct rgw_http_errors *e, int err_no);
+struct rgw_http_error {
+ int http_ret;
+ const char *s3_code;
+};
+
+void rgw_get_errno_s3(struct rgw_http_error *e, int err_no);
class RGWGetObj_ObjStore_S3 : public RGWGetObj_ObjStore
{
@@ -195,46 +200,22 @@ public:
bufferlist* manifest_bl) override;
};
-struct post_part_field {
- string val;
- map<string, string> params;
-};
-
-struct post_form_part {
- string name;
- string content_type;
- map<string, struct post_part_field, ltstr_nocase> fields;
- bufferlist data;
-};
-
class RGWPostObj_ObjStore_S3 : public RGWPostObj_ObjStore {
- string boundary;
- string filename;
- bufferlist in_data;
- map<string, post_form_part, const ltstr_nocase> parts;
+ parts_collection_t parts;
+ std::string filename;
+ std::string content_type;
RGWPolicyEnv env;
RGWPolicy post_policy;
- string err_msg;
map<string, string> crypt_http_responses;
const rgw::auth::StrategyRegistry* auth_registry_ptr = nullptr;
- int read_with_boundary(bufferlist& bl, uint64_t max, bool check_eol,
- bool *reached_boundary,
- bool *done);
-
- int read_line(bufferlist& bl, uint64_t max,
- bool *reached_boundary, bool *done);
-
- int read_data(bufferlist& bl, uint64_t max, bool *reached_boundary, bool *done);
-
- int read_form_part_header(struct post_form_part *part,
- bool *done);
- bool part_str(const string& name, string *val);
- bool part_bl(const string& name, bufferlist *pbl);
-
int get_policy();
void rebuild_key(string& key);
+
+ std::string get_current_filename() const override;
+ std::string get_current_content_type() const override;
+
public:
RGWPostObj_ObjStore_S3() {}
~RGWPostObj_ObjStore_S3() override {}
@@ -248,7 +229,7 @@ public:
int complete_get_params();
void send_response() override;
- int get_data(bufferlist& bl) override;
+ int get_data(ceph::bufferlist& bl, bool& again) override;
int get_encrypt_filter(std::unique_ptr<RGWPutObjDataProcessor>* filter,
RGWPutObjDataProcessor* cb) override;
};
@@ -530,6 +511,9 @@ protected:
bool is_request_payment_op() {
return s->info.args.exists("requestPayment");
}
+ bool is_policy_op() {
+ return s->info.args.exists("policy");
+ }
RGWOp *get_obj_op(bool get_data);
RGWOp *op_get() override;
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index 3d4a4d5bda8..6cca4f2efc2 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -306,6 +306,8 @@ void RGWListBucket_ObjStore_SWIFT::send_response()
s->formatter->dump_string("name", key.name);
s->formatter->dump_string("hash", iter->meta.etag);
s->formatter->dump_int("bytes", iter->meta.accounted_size);
+ if (!iter->meta.user_data.empty())
+ s->formatter->dump_string("user_custom_data", iter->meta.user_data);
string single_content_type = iter->meta.content_type;
if (iter->meta.content_type.size()) {
// content type might hold multiple values, just dump the last one
@@ -502,6 +504,7 @@ static int get_swift_container_settings(req_state * const s,
RGWRados * const store,
RGWAccessControlPolicy * const policy,
bool * const has_policy,
+ uint32_t * rw_mask,
RGWCORSConfiguration * const cors_config,
bool * const has_cors)
{
@@ -524,7 +527,8 @@ static int get_swift_container_settings(req_state * const s,
s->user->user_id,
s->user->display_name,
read_list,
- write_list);
+ write_list,
+ *rw_mask);
if (r < 0) {
return r;
}
@@ -622,8 +626,10 @@ static int get_swift_versioning_settings(
int RGWCreateBucket_ObjStore_SWIFT::get_params()
{
bool has_policy;
+ uint32_t policy_rw_mask = 0;
- int r = get_swift_container_settings(s, store, &policy, &has_policy, &cors_config, &has_cors);
+ int r = get_swift_container_settings(s, store, &policy, &has_policy,
+ &policy_rw_mask, &cors_config, &has_cors);
if (r < 0) {
return r;
}
@@ -758,6 +764,13 @@ int RGWPutObj_ObjStore_SWIFT::get_params()
return r;
}
+ if (!s->cct->_conf->rgw_swift_custom_header.empty()) {
+ string custom_header = s->cct->_conf->rgw_swift_custom_header;
+ if (s->info.env->exists(custom_header.c_str())) {
+ user_data = s->info.env->get(custom_header.c_str());
+ }
+ }
+
dlo_manifest = s->info.env->get("HTTP_X_OBJECT_MANIFEST");
bool exists;
string multipart_manifest = s->info.args.get("multipart-manifest", &exists);
@@ -895,7 +908,7 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params()
}
int r = get_swift_container_settings(s, store, &policy, &has_policy,
- &cors_config, &has_cors);
+ &policy_rw_mask, &cors_config, &has_cors);
if (r < 0) {
return r;
}
@@ -943,7 +956,7 @@ void RGWPutMetadataObject_ObjStore_SWIFT::send_response()
op_ret = STATUS_ACCEPTED;
}
set_req_state_err(s, op_ret);
- if (!s->err.is_err()) {
+ if (!s->is_err()) {
dump_content_length(s, 0);
}
dump_errno(s);
@@ -969,7 +982,6 @@ static void bulkdelete_respond(const unsigned num_deleted,
reason = fail_desc.err;
}
}
-
rgw_err err;
set_req_state_err(err, reason, prot_flags);
dump_errno(err, resp_status);
@@ -1283,7 +1295,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl,
: op_ret);
dump_errno(s);
- if (s->err.is_err()) {
+ if (s->is_err()) {
end_header(s, NULL);
return 0;
}
@@ -1293,7 +1305,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl,
dump_range(s, ofs, end, s->obj_size);
}
- if (s->err.is_err()) {
+ if (s->is_err()) {
end_header(s, NULL);
return 0;
}
@@ -1713,6 +1725,278 @@ bool RGWInfo_ObjStore_SWIFT::is_expired(const std::string& expires, CephContext*
return false;
}
+
+void RGWFormPost::init(RGWRados* const store,
+ req_state* const s,
+ RGWHandler* const dialect_handler)
+{
+ prefix = std::move(s->object.name);
+ s->object = rgw_obj_key();
+
+ return RGWPostObj_ObjStore::init(store, s, dialect_handler);
+}
+
+std::size_t RGWFormPost::get_max_file_size() /*const*/
+{
+ std::string max_str = get_part_str(ctrl_parts, "max_file_size", "0");
+
+ std::string err;
+ const std::size_t max_file_size =
+ static_cast<uint64_t>(strict_strtoll(max_str.c_str(), 10, &err));
+
+ if (! err.empty()) {
+ ldout(s->cct, 5) << "failed to parse FormPost's max_file_size: " << err
+ << dendl;
+ return 0;
+ }
+
+ return max_file_size;
+}
+
+bool RGWFormPost::is_non_expired()
+{
+ std::string expires = get_part_str(ctrl_parts, "expires", "0");
+
+ std::string err;
+ const uint64_t expires_timestamp =
+ static_cast<uint64_t>(strict_strtoll(expires.c_str(), 10, &err));
+
+ if (! err.empty()) {
+ dout(5) << "failed to parse FormPost's expires: " << err << dendl;
+ return false;
+ }
+
+ const utime_t now = ceph_clock_now();
+ if (expires_timestamp <= static_cast<uint64_t>(now.sec())) {
+ dout(5) << "FormPost form expired: "
+ << expires_timestamp << " <= " << now.sec() << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+bool RGWFormPost::is_integral()
+{
+ const std::string form_signature = get_part_str(ctrl_parts, "signature");
+
+ for (const auto& kv : s->user->temp_url_keys) {
+ const int temp_url_key_num = kv.first;
+ const string& temp_url_key = kv.second;
+
+ if (temp_url_key.empty()) {
+ continue;
+ }
+
+ SignatureHelper sig_helper;
+ sig_helper.calc(temp_url_key,
+ s->info.request_uri,
+ get_part_str(ctrl_parts, "redirect"),
+ get_part_str(ctrl_parts, "max_file_size", "0"),
+ get_part_str(ctrl_parts, "max_file_count", "0"),
+ get_part_str(ctrl_parts, "expires", "0"));
+
+ const auto local_sig = sig_helper.get_signature();
+
+ ldout(s->cct, 20) << "FormPost signature [" << temp_url_key_num << "]"
+ << " (calculated): " << local_sig << dendl;
+
+ if (sig_helper.is_equal_to(form_signature)) {
+ return true;
+ } else {
+ ldout(s->cct, 5) << "FormPost's signature mismatch: "
+ << local_sig << " != " << form_signature << dendl;
+ }
+ }
+
+ return false;
+}
+
+int RGWFormPost::get_params()
+{
+ /* The parentt class extracts boundary info from the Content-Type. */
+ int ret = RGWPostObj_ObjStore::get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ policy.create_default(s->user->user_id, s->user->display_name);
+
+ /* Let's start parsing the HTTP body by parsing each form part step-
+ * by-step till encountering the first part with file data. */
+ do {
+ struct post_form_part part;
+ ret = read_form_part_header(&part, stream_done);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
+ ldout(s->cct, 20) << "read part header -- part.name="
+ << part.name << dendl;
+
+ for (const auto& pair : part.fields) {
+ ldout(s->cct, 20) << "field.name=" << pair.first << dendl;
+ ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl;
+ ldout(s->cct, 20) << "field.params:" << dendl;
+
+ for (const auto& param_pair : pair.second.params) {
+ ldout(s->cct, 20) << " " << param_pair.first
+ << " -> " << param_pair.second << dendl;
+ }
+ }
+ }
+
+ if (stream_done) {
+ /* Unexpected here. */
+ err_msg = "Malformed request";
+ return -EINVAL;
+ }
+
+ const auto field_iter = part.fields.find("Content-Disposition");
+ if (std::end(part.fields) != field_iter &&
+ std::end(field_iter->second.params) != field_iter->second.params.find("filename")) {
+ /* First data part ahead. */
+ current_data_part = std::move(part);
+
+ /* Stop the iteration. We can assume that all control parts have been
+ * already parsed. The rest of HTTP body should contain data parts
+ * only. They will be picked up by ::get_data(). */
+ break;
+ } else {
+ /* Control part ahead. Receive, parse and store for later usage. */
+ bool boundary;
+ ret = read_data(part.data, s->cct->_conf->rgw_max_chunk_size,
+ boundary, stream_done);
+ if (ret < 0) {
+ return ret;
+ } else if (! boundary) {
+ err_msg = "Couldn't find boundary";
+ return -EINVAL;
+ }
+
+ ctrl_parts[part.name] = std::move(part);
+ }
+ } while (! stream_done);
+
+ min_len = 0;
+ max_len = get_max_file_size();
+
+ if (! current_data_part) {
+ err_msg = "FormPost: no files to process";
+ return -EINVAL;
+ }
+
+ if (! is_non_expired()) {
+ err_msg = "FormPost: Form Expired";
+ return -EPERM;
+ }
+
+ if (! is_integral()) {
+ err_msg = "FormPost: Invalid Signature";
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+std::string RGWFormPost::get_current_filename() const
+{
+ try {
+ const auto& field = current_data_part->fields.at("Content-Disposition");
+ const auto iter = field.params.find("filename");
+
+ if (std::end(field.params) != iter) {
+ return prefix + iter->second;
+ }
+ } catch (std::out_of_range&) {
+ /* NOP */;
+ }
+
+ return prefix;
+}
+
+std::string RGWFormPost::get_current_content_type() const
+{
+ try {
+ const auto& field = current_data_part->fields.at("Content-Type");
+ return field.val;
+ } catch (std::out_of_range&) {
+ /* NOP */;
+ }
+
+ return std::string();
+}
+
+bool RGWFormPost::is_next_file_to_upload()
+{
+ if (! stream_done) {
+ /* We have at least one additional part in the body. */
+ struct post_form_part part;
+ int r = read_form_part_header(&part, stream_done);
+ if (r < 0) {
+ return false;
+ }
+
+ const auto field_iter = part.fields.find("Content-Disposition");
+ if (std::end(part.fields) != field_iter) {
+ const auto& params = field_iter->second.params;
+
+ if (std::end(params) != params.find("filename")) {
+ current_data_part = std::move(part);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+int RGWFormPost::get_data(ceph::bufferlist& bl, bool& again)
+{
+ bool boundary;
+
+ int r = read_data(bl, s->cct->_conf->rgw_max_chunk_size,
+ boundary, stream_done);
+ if (r < 0) {
+ return r;
+ }
+
+ /* Tell RGWPostObj::execute() that it has some data to put. */
+ again = !boundary;
+
+ return bl.length();
+}
+
+void RGWFormPost::send_response()
+{
+ std::string redirect = get_part_str(ctrl_parts, "redirect");
+ if (! redirect.empty()) {
+ op_ret = STATUS_REDIRECT;
+ }
+
+ set_req_state_err(s, op_ret);
+ s->err.s3_code = err_msg;
+ dump_errno(s);
+ if (! redirect.empty()) {
+ dump_redirect(s, redirect);
+ }
+ end_header(s, this);
+}
+
+bool RGWFormPost::is_formpost_req(req_state* const s)
+{
+ std::string content_type;
+ std::map<std::string, std::string> params;
+
+ parse_boundary_params(s->info.env->get("CONTENT_TYPE", ""),
+ content_type, params);
+
+ return boost::algorithm::iequals(content_type, "multipart/form-data") &&
+ params.count("boundary") > 0;
+}
+
+
RGWOp *RGWHandler_REST_Service_SWIFT::op_get()
{
return new RGWListBuckets_ObjStore_SWIFT;
@@ -1788,9 +2072,8 @@ int RGWSwiftWebsiteHandler::error_handler(const int err_no,
const auto& ws_conf = s->bucket_info.website_conf;
if (can_be_website_req() && ! ws_conf.error_doc.empty()) {
- struct rgw_err err;
- set_req_state_err(err, err_no, s->prot_flags);
- return serve_errordoc(err.http_ret, ws_conf.error_doc);
+ set_req_state_err(s, err_no);
+ return serve_errordoc(s->err.http_ret, ws_conf.error_doc);
}
/* Let's go to the default, no-op handler. */
@@ -2103,6 +2386,9 @@ RGWOp *RGWHandler_REST_Bucket_SWIFT::op_put()
if (is_acl_op()) {
return new RGWPutACLs_ObjStore_SWIFT;
}
+ if(s->info.args.exists("extract-archive")) {
+ return new RGWBulkUploadOp_ObjStore_SWIFT;
+ }
return new RGWCreateBucket_ObjStore_SWIFT;
}
@@ -2113,7 +2399,11 @@ RGWOp *RGWHandler_REST_Bucket_SWIFT::op_delete()
RGWOp *RGWHandler_REST_Bucket_SWIFT::op_post()
{
- return new RGWPutMetadataBucket_ObjStore_SWIFT;
+ if (RGWFormPost::is_formpost_req(s)) {
+ return new RGWFormPost;
+ } else {
+ return new RGWPutMetadataBucket_ObjStore_SWIFT;
+ }
}
RGWOp *RGWHandler_REST_Bucket_SWIFT::op_options()
@@ -2148,6 +2438,9 @@ RGWOp *RGWHandler_REST_Obj_SWIFT::op_put()
if (is_acl_op()) {
return new RGWPutACLs_ObjStore_SWIFT;
}
+ if(s->info.args.exists("extract-archive")) {
+ return new RGWBulkUploadOp_ObjStore_SWIFT;
+ }
if (s->init_state.src_bucket.empty())
return new RGWPutObj_ObjStore_SWIFT;
else
@@ -2161,7 +2454,11 @@ RGWOp *RGWHandler_REST_Obj_SWIFT::op_delete()
RGWOp *RGWHandler_REST_Obj_SWIFT::op_post()
{
- return new RGWPutMetadataObject_ObjStore_SWIFT;
+ if (RGWFormPost::is_formpost_req(s)) {
+ return new RGWFormPost;
+ } else {
+ return new RGWPutMetadataObject_ObjStore_SWIFT;
+ }
}
RGWOp *RGWHandler_REST_Obj_SWIFT::op_copy()
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
index 8478b1bad7b..0b1293e286f 100644
--- a/src/rgw/rgw_rest_swift.h
+++ b/src/rgw/rgw_rest_swift.h
@@ -11,6 +11,9 @@
#include "rgw_op.h"
#include "rgw_rest.h"
#include "rgw_swift_auth.h"
+#include "rgw_http_errors.h"
+
+#include <boost/utility/string_ref.hpp>
class RGWGetObj_ObjStore_SWIFT : public RGWGetObj_ObjStore {
int custom_http_ret = 0;
@@ -238,6 +241,94 @@ public:
};
+class RGWFormPost : public RGWPostObj_ObjStore {
+ std::string get_current_filename() const override;
+ std::string get_current_content_type() const override;
+ std::size_t get_max_file_size() /*const*/;
+ bool is_next_file_to_upload() override;
+ bool is_integral();
+ bool is_non_expired();
+
+ parts_collection_t ctrl_parts;
+ boost::optional<post_form_part> current_data_part;
+ std::string prefix;
+ bool stream_done = false;
+
+ class SignatureHelper;
+public:
+ RGWFormPost() = default;
+ ~RGWFormPost() = default;
+
+ void init(RGWRados* store,
+ req_state* s,
+ RGWHandler* dialect_handler) override;
+
+ int get_params() override;
+ int get_data(ceph::bufferlist& bl, bool& again) override;
+ void send_response() override;
+
+ static bool is_formpost_req(req_state* const s);
+};
+
+class RGWFormPost::SignatureHelper
+{
+private:
+ static constexpr uint32_t output_size =
+ CEPH_CRYPTO_HMACSHA1_DIGESTSIZE * 2 + 1;
+
+ unsigned char dest[CEPH_CRYPTO_HMACSHA1_DIGESTSIZE]; // 20
+ char dest_str[output_size];
+
+public:
+ SignatureHelper() = default;
+
+ const char* calc(const std::string& key,
+ const boost::string_ref& path_info,
+ const boost::string_ref& redirect,
+ const boost::string_ref& max_file_size,
+ const boost::string_ref& max_file_count,
+ const boost::string_ref& expires) {
+ using ceph::crypto::HMACSHA1;
+ using UCHARPTR = const unsigned char*;
+
+ HMACSHA1 hmac((UCHARPTR) key.data(), key.size());
+
+ hmac.Update((UCHARPTR) path_info.data(), path_info.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) redirect.data(), redirect.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) max_file_size.data(), max_file_size.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) max_file_count.data(), max_file_count.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) expires.data(), expires.size());
+
+ hmac.Final(dest);
+
+ buf_to_hex((UCHARPTR) dest, sizeof(dest), dest_str);
+
+ return dest_str;
+ }
+
+ const char* get_signature() const {
+ return dest_str;
+ }
+
+ bool is_equal_to(const std::string& rhs) const {
+ /* never allow out-of-range exception */
+ if (rhs.size() < (output_size - 1)) {
+ return false;
+ }
+ return rhs.compare(0 /* pos */, output_size, dest_str) == 0;
+ }
+
+}; /* RGWFormPost::SignatureHelper */
+
+
class RGWSwiftWebsiteHandler {
RGWRados* const store;
req_state* const s;
diff --git a/src/rgw/rgw_role.cc b/src/rgw/rgw_role.cc
index 9ccc20e70e3..236fe6e6aa4 100644
--- a/src/rgw/rgw_role.cc
+++ b/src/rgw/rgw_role.cc
@@ -1,5 +1,6 @@
#include <errno.h>
#include <ctime>
+#include <regex>
#include "common/errno.h"
#include "common/Formatter.h"
@@ -38,7 +39,7 @@ int RGWRole::store_name(bool exclusive)
RGWNameToId nameToId;
nameToId.obj_id = id;
- string oid = get_names_oid_prefix() + name;
+ string oid = tenant + get_names_oid_prefix() + name;
bufferlist bl;
::encode(nameToId, bl);
@@ -48,7 +49,7 @@ int RGWRole::store_name(bool exclusive)
int RGWRole::store_path(bool exclusive)
{
- string oid = get_path_oid_prefix() + path + get_info_oid_prefix() + id;
+ string oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id;
return rgw_put_system_obj(store, store->get_zone_params().roles_pool, oid,
NULL, 0, exclusive, NULL, real_time(), NULL);
@@ -58,8 +59,12 @@ int RGWRole::create(bool exclusive)
{
int ret;
+ if (! validate_input()) {
+ return -EINVAL;
+ }
+
/* check to see the name is not used */
- ret = read_id(name, id);
+ ret = read_id(name, tenant, id);
if (exclusive && ret == 0) {
ldout(cct, 0) << "ERROR: name " << name << " already in use for role id "
<< id << dendl;
@@ -78,7 +83,7 @@ int RGWRole::create(bool exclusive)
id = uuid_str;
//arn
- arn = role_arn_prefix + uid + ":role" + path + name;
+ arn = role_arn_prefix + tenant + ":role" + path + name;
// Creation time
real_clock::time_point t = real_clock::now();
@@ -128,7 +133,7 @@ int RGWRole::create(bool exclusive)
<< id << ": " << cpp_strerror(-info_ret) << dendl;
}
//Delete role name that was stored in previous call
- oid = get_names_oid_prefix() + name;
+ oid = tenant + get_names_oid_prefix() + name;
int name_ret = rgw_delete_system_obj(store, pool, oid, NULL);
if (name_ret < 0) {
ldout(cct, 0) << "ERROR: cleanup of role name from pool: " << pool.name << ": "
@@ -166,7 +171,7 @@ int RGWRole::delete_obj()
}
// Delete name
- oid = get_names_oid_prefix() + name;
+ oid = tenant + get_names_oid_prefix() + name;
ret = rgw_delete_system_obj(store, pool, oid, NULL);
if (ret < 0) {
ldout(cct, 0) << "ERROR: deleting role name from pool: " << pool.name << ": "
@@ -174,7 +179,7 @@ int RGWRole::delete_obj()
}
// Delete path
- oid = get_path_oid_prefix() + path + get_info_oid_prefix() + id;
+ oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id;
ret = rgw_delete_system_obj(store, pool, oid, NULL);
if (ret < 0) {
ldout(cct, 0) << "ERROR: deleting role path from pool: " << pool.name << ": "
@@ -198,6 +203,16 @@ int RGWRole::get()
return 0;
}
+int RGWRole::get_by_id()
+{
+ int ret = read_info();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
int RGWRole::update()
{
auto& pool = store->get_zone_params().roles_pool;
@@ -272,10 +287,10 @@ void RGWRole::decode_json(JSONObj *obj)
JSONDecoder::decode_json("assume_role_policy_document", trust_policy, obj);
}
-int RGWRole::read_id(const string& role_name, string& role_id)
+int RGWRole::read_id(const string& role_name, const string& tenant, string& role_id)
{
auto& pool = store->get_zone_params().roles_pool;
- string oid = get_names_oid_prefix() + role_name;
+ string oid = tenant + get_names_oid_prefix() + role_name;
bufferlist bl;
RGWObjectCtx obj_ctx(store);
@@ -326,7 +341,7 @@ int RGWRole::read_info()
int RGWRole::read_name()
{
auto& pool = store->get_zone_params().roles_pool;
- string oid = get_names_oid_prefix() + name;
+ string oid = tenant + get_names_oid_prefix() + name;
bufferlist bl;
RGWObjectCtx obj_ctx(store);
@@ -350,21 +365,61 @@ int RGWRole::read_name()
return 0;
}
+bool RGWRole::validate_input()
+{
+ if (name.length() > MAX_ROLE_NAME_LEN) {
+ ldout(cct, 0) << "ERROR: Invalid name length " << dendl;
+ return false;
+ }
+
+ if (path.length() > MAX_PATH_NAME_LEN) {
+ ldout(cct, 0) << "ERROR: Invalid path length " << dendl;
+ return false;
+ }
+
+ std::regex regex_name("[A-Za-z0-9:=,.@-]+");
+ if (! std::regex_match(name, regex_name)) {
+ ldout(cct, 0) << "ERROR: Invalid chars in name " << dendl;
+ return false;
+ }
+
+ std::regex regex_path("(/[!-~]+/)|(/)");
+ if (! std::regex_match(path,regex_path)) {
+ ldout(cct, 0) << "ERROR: Invalid chars in path " << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+void RGWRole::extract_name_tenant(const std::string& str)
+{
+ size_t pos = str.find('$');
+ if (pos != std::string::npos) {
+ tenant = str.substr(0, pos);
+ name = str.substr(pos + 1);
+ }
+}
+
void RGWRole::update_trust_policy(string& trust_policy)
{
this->trust_policy = trust_policy;
}
-int RGWRole::get_roles_by_path_prefix(RGWRados *store, CephContext *cct, const string& path_prefix, vector<RGWRole>& roles)
+int RGWRole::get_roles_by_path_prefix(RGWRados *store,
+ CephContext *cct,
+ const string& path_prefix,
+ const string& tenant,
+ vector<RGWRole>& roles)
{
auto pool = store->get_zone_params().roles_pool;
string prefix;
// List all roles if path prefix is empty
if (! path_prefix.empty()) {
- prefix = role_path_oid_prefix + path_prefix;
+ prefix = tenant + role_path_oid_prefix + path_prefix;
} else {
- prefix = role_path_oid_prefix;
+ prefix = tenant + role_path_oid_prefix;
}
//Get the filtered objects
diff --git a/src/rgw/rgw_role.h b/src/rgw/rgw_role.h
index f8a60a4c568..bccb576ed2d 100644
--- a/src/rgw/rgw_role.h
+++ b/src/rgw/rgw_role.h
@@ -7,6 +7,8 @@ class RGWRole
static const string role_oid_prefix;
static const string role_path_oid_prefix;
static const string role_arn_prefix;
+ static constexpr int MAX_ROLE_NAME_LEN = 64;
+ static constexpr int MAX_PATH_NAME_LEN = 512;
CephContext *cct;
RGWRados *store;
@@ -17,15 +19,17 @@ class RGWRole
string creation_date;
string trust_policy;
map<string, string> perm_policy_map;
- string uid;
+ string tenant;
int store_info(bool exclusive);
int store_name(bool exclusive);
int store_path(bool exclusive);
- int read_id(const string& role_name, string& role_id);
+ int read_id(const string& role_name, const string& tenant, string& role_id);
int read_name();
int read_info();
void set_id(const string& id) { this->id = id; }
+ bool validate_input();
+ void extract_name_tenant(const std::string& str);
public:
RGWRole(CephContext *cct,
@@ -33,23 +37,35 @@ public:
string name,
string path,
string trust_policy,
- string uid)
+ string tenant)
: cct(cct),
store(store),
name(std::move(name)),
path(std::move(path)),
trust_policy(std::move(trust_policy)),
- uid(std::move(uid)) {
+ tenant(std::move(tenant)) {
if (this->path.empty())
this->path = "/";
+ extract_name_tenant(this->name);
}
RGWRole(CephContext *cct,
RGWRados *store,
- string name)
+ string name,
+ string tenant)
+ : cct(cct),
+ store(store),
+ name(std::move(name)),
+ tenant(std::move(tenant)) {
+ extract_name_tenant(this->name);
+ }
+
+ RGWRole(CephContext *cct,
+ RGWRados *store,
+ string id)
: cct(cct),
store(store),
- name(std::move(name)) {}
+ id(std::move(id)) {}
RGWRole(CephContext *cct,
RGWRados *store)
@@ -61,7 +77,7 @@ public:
~RGWRole() = default;
void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(id, bl);
::encode(name, bl);
::encode(path, bl);
@@ -69,11 +85,12 @@ public:
::encode(creation_date, bl);
::encode(trust_policy, bl);
::encode(perm_policy_map, bl);
+ ::encode(tenant, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
::decode(id, bl);
::decode(name, bl);
::decode(path, bl);
@@ -81,6 +98,9 @@ public:
::decode(creation_date, bl);
::decode(trust_policy, bl);
::decode(perm_policy_map, bl);
+ if (struct_v >= 2) {
+ ::decode(tenant, bl);
+ }
DECODE_FINISH(bl);
}
@@ -88,10 +108,12 @@ public:
const string& get_name() const { return name; }
const string& get_path() const { return path; }
const string& get_create_date() const { return creation_date; }
+ const string& get_assume_role_policy() const { return trust_policy;}
int create(bool exclusive);
int delete_obj();
int get();
+ int get_by_id();
int update();
void update_trust_policy(string& trust_policy);
void set_perm_policy(const string& policy_name, const string& perm_policy);
@@ -104,7 +126,11 @@ public:
static const string& get_names_oid_prefix();
static const string& get_info_oid_prefix();
static const string& get_path_oid_prefix();
- static int get_roles_by_path_prefix(RGWRados *store, CephContext *cct, const string& path_prefix, vector<RGWRole>& roles);
+ static int get_roles_by_path_prefix(RGWRados *store,
+ CephContext *cct,
+ const string& path_prefix,
+ const string& tenant,
+ vector<RGWRole>& roles);
};
WRITE_CLASS_ENCODER(RGWRole)
#endif /* CEPH_RGW_ROLE_H */
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc
index be1fa2fdd88..9d8d250132b 100644
--- a/src/rgw/rgw_sync.cc
+++ b/src/rgw/rgw_sync.cc
@@ -52,7 +52,7 @@ RGWCoroutine *RGWSyncErrorLogger::log_error_cr(const string& source_zone, const
::encode(info, bl);
store->time_log_prepare_entry(entry, real_clock::now(), section, name, bl);
- uint32_t shard_id = counter.inc() % num_shards;
+ uint32_t shard_id = ++counter % num_shards;
return new RGWRadosTimelogAddCR(store, oids[shard_id], entry);
@@ -83,8 +83,8 @@ void RGWSyncBackoff::backoff(RGWCoroutine *op)
}
int RGWBackoffControlCR::operate() {
- RGWCoroutine *finisher_cr;
reenter(this) {
+ // retry the operation until it succeeds
while (true) {
yield {
Mutex::Locker l(lock);
@@ -97,7 +97,10 @@ int RGWBackoffControlCR::operate() {
cr->put();
cr = NULL;
}
- if (retcode < 0 && retcode != -EBUSY && retcode != -EAGAIN) {
+ if (retcode >= 0) {
+ break;
+ }
+ if (retcode != -EBUSY && retcode != -EAGAIN) {
ldout(cct, 0) << "ERROR: RGWBackoffControlCR called coroutine returned " << retcode << dendl;
if (exit_on_error) {
return set_cr_error(retcode);
@@ -107,17 +110,15 @@ int RGWBackoffControlCR::operate() {
backoff.reset();
}
yield backoff.backoff(this);
- finisher_cr = alloc_finisher_cr();
- if (finisher_cr) {
- yield call(finisher_cr);
- if (retcode < 0) {
- ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl;
- if (exit_on_error) {
- return set_cr_error(retcode);
- }
- }
- }
}
+
+ // run an optional finisher
+ yield call(alloc_finisher_cr());
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
}
return 0;
}
@@ -289,7 +290,7 @@ int RGWRemoteMetaLog::init()
void RGWRemoteMetaLog::finish()
{
- going_down.set(1);
+ going_down = true;
stop();
}
@@ -320,13 +321,14 @@ int RGWMetaSyncStatusManager::init()
RGWMetaSyncEnv& sync_env = master_log.get_sync_env();
- r = read_sync_status();
+ rgw_meta_sync_status sync_status;
+ r = read_sync_status(&sync_status);
if (r < 0 && r != -ENOENT) {
lderr(store->ctx()) << "ERROR: failed to read sync status, r=" << r << dendl;
return r;
}
- int num_shards = master_log.get_sync_status().sync_info.num_shards;
+ int num_shards = sync_status.sync_info.num_shards;
for (int i = 0; i < num_shards; i++) {
shard_objs[i] = rgw_raw_obj(store->get_zone_params().log_pool, sync_env.shard_obj_name(i));
@@ -691,36 +693,75 @@ public:
}
};
-class RGWReadSyncStatusCoroutine : public RGWSimpleRadosReadCR<rgw_meta_sync_info> {
- RGWMetaSyncEnv *sync_env;
+class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
- rgw_meta_sync_status *sync_status;
+ RGWMetaSyncEnv *env;
+ const int num_shards;
+ int shard_id{0};
+ map<uint32_t, rgw_meta_sync_marker>& markers;
-public:
- RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
- rgw_meta_sync_status *_status) : RGWSimpleRadosReadCR(_sync_env->async_rados, _sync_env->store,
- rgw_raw_obj(_sync_env->store->get_zone_params().log_pool, _sync_env->status_oid()),
- &_status->sync_info),
- sync_env(_sync_env),
- sync_status(_status) {
+ public:
+ RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards,
+ map<uint32_t, rgw_meta_sync_marker>& markers)
+ : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS),
+ env(env), num_shards(num_shards), markers(markers)
+ {}
+ bool spawn_next() override;
+};
+bool RGWReadSyncStatusMarkersCR::spawn_next()
+{
+ if (shard_id >= num_shards) {
+ return false;
}
+ using CR = RGWSimpleRadosReadCR<rgw_meta_sync_marker>;
+ rgw_raw_obj obj{env->store->get_zone_params().log_pool,
+ env->shard_obj_name(shard_id)};
+ spawn(new CR(env->async_rados, env->store, obj, &markers[shard_id]), false);
+ shard_id++;
+ return true;
+}
+
+class RGWReadSyncStatusCoroutine : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ rgw_meta_sync_status *sync_status;
- int handle_data(rgw_meta_sync_info& data) override;
+public:
+ RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
+ rgw_meta_sync_status *_status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status)
+ {}
+ int operate() override;
};
-int RGWReadSyncStatusCoroutine::handle_data(rgw_meta_sync_info& data)
+int RGWReadSyncStatusCoroutine::operate()
{
- if (retcode == -ENOENT) {
- return 0;
- }
-
- RGWRados *store = sync_env->store;
- map<uint32_t, rgw_meta_sync_marker>& markers = sync_status->sync_markers;
- for (int i = 0; i < (int)data.num_shards; i++) {
- spawn(new RGWSimpleRadosReadCR<rgw_meta_sync_marker>(sync_env->async_rados, store,
- rgw_raw_obj(store->get_zone_params().log_pool, sync_env->shard_obj_name(i)),
- &markers[i]), true);
+ reenter(this) {
+ // read sync info
+ using ReadInfoCR = RGWSimpleRadosReadCR<rgw_meta_sync_info>;
+ yield {
+ bool empty_on_enoent = false; // fail on ENOENT
+ rgw_raw_obj obj{sync_env->store->get_zone_params().log_pool,
+ sync_env->status_oid()};
+ call(new ReadInfoCR(sync_env->async_rados, sync_env->store, obj,
+ &sync_status->sync_info, empty_on_enoent));
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 4) << "failed to read sync status info with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ // read shard markers
+ using ReadMarkersCR = RGWReadSyncStatusMarkersCR;
+ yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards,
+ sync_status->sync_markers));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 4) << "failed to read sync status markers with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
}
return 0;
}
@@ -1103,7 +1144,7 @@ public:
sync_marker.timestamp = timestamp;
}
- ldout(sync_env->cct, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << dendl;
+ ldout(sync_env->cct, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << " realm_epoch=" << sync_marker.realm_epoch << dendl;
RGWRados *store = sync_env->store;
return new RGWSimpleRadosWriteCR<rgw_meta_sync_marker>(sync_env->async_rados,
store,
@@ -1244,6 +1285,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
const rgw_pool& pool;
const std::string& period; //< currently syncing period id
+ const epoch_t realm_epoch; //< realm_epoch of period
RGWMetadataLog* mdlog; //< log of syncing period
uint32_t shard_id;
rgw_meta_sync_marker& sync_marker;
@@ -1291,11 +1333,13 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
public:
RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool,
- const std::string& period, RGWMetadataLog* mdlog,
- uint32_t _shard_id, rgw_meta_sync_marker& _marker,
+ const std::string& period, epoch_t realm_epoch,
+ RGWMetadataLog* mdlog, uint32_t _shard_id,
+ rgw_meta_sync_marker& _marker,
const std::string& period_marker, bool *_reset_backoff)
: RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool),
- period(period), mdlog(mdlog), shard_id(_shard_id), sync_marker(_marker),
+ period(period), realm_epoch(realm_epoch), mdlog(mdlog),
+ shard_id(_shard_id), sync_marker(_marker),
period_marker(period_marker), inc_lock("RGWMetaSyncShardCR::inc_lock"),
reset_backoff(_reset_backoff) {
*reset_backoff = false;
@@ -1478,7 +1522,8 @@ public:
temp_marker->state = rgw_meta_sync_marker::IncrementalSync;
temp_marker->marker = std::move(temp_marker->next_step_marker);
temp_marker->next_step_marker.clear();
- ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << temp_marker->marker << dendl;
+ temp_marker->realm_epoch = realm_epoch;
+ ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << temp_marker->marker << " realm_epoch=" << realm_epoch << dendl;
using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_meta_sync_marker>;
yield call(new WriteMarkerCR(sync_env->async_rados, sync_env->store,
@@ -1549,6 +1594,14 @@ public:
yield;
}
}
+ // if the period has advanced, we can't use the existing marker
+ if (sync_marker.realm_epoch < realm_epoch) {
+ ldout(sync_env->cct, 0) << "clearing marker=" << sync_marker.marker
+ << " from old realm_epoch=" << sync_marker.realm_epoch
+ << " (now " << realm_epoch << ')' << dendl;
+ sync_marker.realm_epoch = realm_epoch;
+ sync_marker.marker.clear();
+ }
mdlog_marker = sync_marker.marker;
set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env,
sync_env->shard_obj_name(shard_id),
@@ -1570,7 +1623,8 @@ public:
}
#define INCREMENTAL_MAX_ENTRIES 100
ldout(sync_env->cct, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl;
- if (!period_marker.empty() && period_marker <= marker) {
+ if (!period_marker.empty() && period_marker <= mdlog_marker) {
+ ldout(cct, 10) << "mdlog_marker past period_marker=" << period_marker << dendl;
done_with_period = true;
break;
}
@@ -1602,10 +1656,16 @@ public:
*reset_backoff = false; // back off and try again later
return retcode;
}
- for (log_iter = log_entries.begin(); log_iter != log_entries.end(); ++log_iter) {
- if (!period_marker.empty() && period_marker < log_iter->id) {
+ for (log_iter = log_entries.begin(); log_iter != log_entries.end() && !done_with_period; ++log_iter) {
+ if (!period_marker.empty() && period_marker <= log_iter->id) {
done_with_period = true;
- break;
+ if (period_marker < log_iter->id) {
+ ldout(cct, 10) << "found key=" << log_iter->id
+ << " past period_marker=" << period_marker << dendl;
+ break;
+ }
+ ldout(cct, 10) << "found key at period_marker=" << period_marker << dendl;
+ // sync this entry, then return control to RGWMetaSyncCR
}
if (!mdlog_entry.convert_from(*log_iter)) {
ldout(sync_env->cct, 0) << __func__ << ":" << __LINE__ << ": ERROR: failed to convert mdlog entry, shard_id=" << shard_id << " log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp << " ... skipping entry" << dendl;
@@ -1631,6 +1691,7 @@ public:
ldout(sync_env->cct, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " max_marker=" << max_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl;
if (done_with_period) {
// return control to RGWMetaSyncCR and advance to the next period
+ ldout(sync_env->cct, 10) << *this << ": done with period" << dendl;
break;
}
if (mdlog_marker == max_marker && can_adjust_marker) {
@@ -1655,6 +1716,8 @@ public:
if (!can_adjust_marker) {
return -EAGAIN;
}
+
+ return set_cr_done();
}
/* TODO */
return 0;
@@ -1667,6 +1730,7 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR
const rgw_pool& pool;
const std::string& period;
+ epoch_t realm_epoch;
RGWMetadataLog* mdlog;
uint32_t shard_id;
rgw_meta_sync_marker sync_marker;
@@ -1675,16 +1739,18 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR
static constexpr bool exit_on_error = false; // retry on all errors
public:
RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool,
- const std::string& period, RGWMetadataLog* mdlog,
- uint32_t _shard_id, const rgw_meta_sync_marker& _marker,
+ const std::string& period, epoch_t realm_epoch,
+ RGWMetadataLog* mdlog, uint32_t _shard_id,
+ const rgw_meta_sync_marker& _marker,
std::string&& period_marker)
: RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env),
- pool(_pool), period(period), mdlog(mdlog), shard_id(_shard_id),
- sync_marker(_marker), period_marker(std::move(period_marker)) {}
+ pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog),
+ shard_id(_shard_id), sync_marker(_marker),
+ period_marker(std::move(period_marker)) {}
RGWCoroutine *alloc_cr() override {
- return new RGWMetaSyncShardCR(sync_env, pool, period, mdlog, shard_id,
- sync_marker, period_marker, backoff_ptr());
+ return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog,
+ shard_id, sync_marker, period_marker, backoff_ptr());
}
RGWCoroutine *alloc_finisher_cr() override {
@@ -1743,6 +1809,7 @@ public:
yield {
// get the mdlog for the current period (may be empty)
auto& period_id = sync_status.sync_info.period;
+ auto realm_epoch = sync_status.sync_info.realm_epoch;
auto mdlog = sync_env->store->meta_mgr->get_log(period_id);
// prevent wakeup() from accessing shard_crs while we're spawning them
@@ -1765,9 +1832,10 @@ public:
}
}
- auto cr = new RGWMetaSyncShardControlCR(sync_env, pool, period_id,
- mdlog, shard_id, marker,
- std::move(period_marker));
+ using ShardCR = RGWMetaSyncShardControlCR;
+ auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch,
+ mdlog, shard_id, marker,
+ std::move(period_marker));
auto stack = spawn(cr, false);
shard_crs[shard_id] = RefPair{cr, stack};
}
@@ -1821,13 +1889,24 @@ void RGWRemoteMetaLog::init_sync_env(RGWMetaSyncEnv *env) {
env->error_logger = error_logger;
}
-int RGWRemoteMetaLog::read_sync_status()
+int RGWRemoteMetaLog::read_sync_status(rgw_meta_sync_status *sync_status)
{
if (store->is_meta_master()) {
return 0;
}
-
- return run(new RGWReadSyncStatusCoroutine(&sync_env, &sync_status));
+ // cannot run concurrently with run_sync(), so run in a separate manager
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.set_threaded();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "failed in http_manager.set_threaded() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWMetaSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ ret = crs.run(new RGWReadSyncStatusCoroutine(&sync_env_local, sync_status));
+ http_manager.stop();
+ return ret;
}
int RGWRemoteMetaLog::init_sync_status()
@@ -1836,30 +1915,29 @@ int RGWRemoteMetaLog::init_sync_status()
return 0;
}
- auto& sync_info = sync_status.sync_info;
- if (!sync_info.num_shards) {
- rgw_mdlog_info mdlog_info;
- int r = read_log_info(&mdlog_info);
- if (r < 0) {
- lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
- return r;
- }
- sync_info.num_shards = mdlog_info.num_shards;
- auto cursor = store->period_history->get_current();
- if (cursor) {
- sync_info.period = cursor.get_period().get_id();
- sync_info.realm_epoch = cursor.get_epoch();
- }
+ rgw_mdlog_info mdlog_info;
+ int r = read_log_info(&mdlog_info);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
+ return r;
+ }
+
+ rgw_meta_sync_info sync_info;
+ sync_info.num_shards = mdlog_info.num_shards;
+ auto cursor = store->period_history->get_current();
+ if (cursor) {
+ sync_info.period = cursor.get_period().get_id();
+ sync_info.realm_epoch = cursor.get_epoch();
}
return run(new RGWInitSyncStatusCoroutine(&sync_env, sync_info));
}
-int RGWRemoteMetaLog::store_sync_info()
+int RGWRemoteMetaLog::store_sync_info(const rgw_meta_sync_info& sync_info)
{
return run(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, sync_env.status_oid()),
- sync_status.sync_info));
+ sync_info));
}
// return a cursor to the period at our sync position
@@ -1914,7 +1992,7 @@ int RGWRemoteMetaLog::run_sync()
// get shard count and oldest log period from master
rgw_mdlog_info mdlog_info;
for (;;) {
- if (going_down.read()) {
+ if (going_down) {
ldout(store->ctx(), 1) << __func__ << "(): going down" << dendl;
return 0;
}
@@ -1933,8 +2011,9 @@ int RGWRemoteMetaLog::run_sync()
break;
}
+ rgw_meta_sync_status sync_status;
do {
- if (going_down.read()) {
+ if (going_down) {
ldout(store->ctx(), 1) << __func__ << "(): going down" << dendl;
return 0;
}
@@ -2008,7 +2087,7 @@ int RGWRemoteMetaLog::run_sync()
}
sync_status.sync_info.state = rgw_meta_sync_info::StateSync;
- r = store_sync_info();
+ r = store_sync_info(sync_status.sync_info);
if (r < 0) {
ldout(store->ctx(), 0) << "ERROR: failed to update sync status" << dendl;
return r;
@@ -2033,7 +2112,7 @@ int RGWRemoteMetaLog::run_sync()
ldout(store->ctx(), 0) << "ERROR: bad sync state!" << dendl;
return -EIO;
}
- } while (!going_down.read());
+ } while (!going_down);
return 0;
}
@@ -2234,3 +2313,690 @@ int RGWCloneMetaLogCoroutine::state_store_mdlog_entries_complete()
}
+// TODO: move into rgw_sync_trim.cc
+#undef dout_prefix
+#define dout_prefix (*_dout << "meta trim: ")
+
+/// purge all log shards for the given mdlog
+class PurgeLogShardsCR : public RGWShardCollectCR {
+ RGWRados *const store;
+ const RGWMetadataLog* mdlog;
+ const int num_shards;
+ rgw_raw_obj obj;
+ int i{0};
+
+ static constexpr int max_concurrent = 16;
+
+ public:
+ PurgeLogShardsCR(RGWRados *store, const RGWMetadataLog* mdlog,
+ const rgw_pool& pool, int num_shards)
+ : RGWShardCollectCR(store->ctx(), max_concurrent),
+ store(store), mdlog(mdlog), num_shards(num_shards), obj(pool, "")
+ {}
+
+ bool spawn_next() override {
+ if (i == num_shards) {
+ return false;
+ }
+ mdlog->get_shard_oid(i++, obj.oid);
+ spawn(new RGWRadosRemoveCR(store, obj), false);
+ return true;
+ }
+};
+
+using Cursor = RGWPeriodHistory::Cursor;
+
+/// purge mdlogs from the oldest up to (but not including) the given realm_epoch
+class PurgePeriodLogsCR : public RGWCoroutine {
+ RGWRados *const store;
+ RGWMetadataManager *const metadata;
+ RGWObjVersionTracker objv;
+ Cursor cursor;
+ epoch_t realm_epoch;
+ epoch_t *last_trim_epoch; //< update last trim on success
+
+ public:
+ PurgePeriodLogsCR(RGWRados *store, epoch_t realm_epoch, epoch_t *last_trim)
+ : RGWCoroutine(store->ctx()), store(store), metadata(store->meta_mgr),
+ realm_epoch(realm_epoch), last_trim_epoch(last_trim)
+ {}
+
+ int operate();
+};
+
+int PurgePeriodLogsCR::operate()
+{
+ reenter(this) {
+ // read our current oldest log period
+ yield call(metadata->read_oldest_log_period_cr(&cursor, &objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ assert(cursor);
+ ldout(cct, 20) << "oldest log realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+
+ // trim -up to- the given realm_epoch
+ while (cursor.get_epoch() < realm_epoch) {
+ ldout(cct, 4) << "purging log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+ yield {
+ const auto mdlog = metadata->get_log(cursor.get_period().get_id());
+ const auto& pool = store->get_zone_params().log_pool;
+ auto num_shards = cct->_conf->rgw_md_log_max_shards;
+ call(new PurgeLogShardsCR(store, mdlog, pool, num_shards));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to remove log shards: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ ldout(cct, 10) << "removed log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+
+ // update our mdlog history
+ yield call(metadata->trim_log_period_cr(cursor, &objv));
+ if (retcode == -ENOENT) {
+ // must have raced to update mdlog history. return success and allow the
+ // winner to continue purging
+ ldout(cct, 10) << "already removed log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+ return set_cr_done();
+ } else if (retcode < 0) {
+ ldout(cct, 1) << "failed to remove log shards for realm_epoch="
+ << cursor.get_epoch() << " period=" << cursor.get_period().get_id()
+ << " with: " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ if (*last_trim_epoch < cursor.get_epoch()) {
+ *last_trim_epoch = cursor.get_epoch();
+ }
+
+ assert(cursor.has_next()); // get_current() should always come after
+ cursor.next();
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+namespace {
+
+using connection_map = std::map<std::string, std::unique_ptr<RGWRESTConn>>;
+
+/// construct a RGWRESTConn for each zone in the realm
+template <typename Zonegroups>
+connection_map make_peer_connections(RGWRados *store,
+ const Zonegroups& zonegroups)
+{
+ connection_map connections;
+ for (auto& g : zonegroups) {
+ for (auto& z : g.second.zones) {
+ std::unique_ptr<RGWRESTConn> conn{
+ new RGWRESTConn(store->ctx(), store, z.first, z.second.endpoints)};
+ connections.emplace(z.first, std::move(conn));
+ }
+ }
+ return connections;
+}
+
+/// return the marker that it's safe to trim up to
+const std::string& get_stable_marker(const rgw_meta_sync_marker& m)
+{
+ return m.state == m.FullSync ? m.next_step_marker : m.marker;
+}
+
+/// comparison operator for take_min_status()
+bool operator<(const rgw_meta_sync_marker& lhs, const rgw_meta_sync_marker& rhs)
+{
+ // sort by stable marker
+ return get_stable_marker(lhs) < get_stable_marker(rhs);
+}
+
+/// populate the status with the minimum stable marker of each shard for any
+/// peer whose realm_epoch matches the minimum realm_epoch in the input
+template <typename Iter>
+int take_min_status(CephContext *cct, Iter first, Iter last,
+ rgw_meta_sync_status *status)
+{
+ if (first == last) {
+ return -EINVAL;
+ }
+ const size_t num_shards = cct->_conf->rgw_md_log_max_shards;
+
+ status->sync_info.realm_epoch = std::numeric_limits<epoch_t>::max();
+ for (auto p = first; p != last; ++p) {
+ // validate peer's shard count
+ if (p->sync_markers.size() != num_shards) {
+ ldout(cct, 1) << "take_min_status got peer status with "
+ << p->sync_markers.size() << " shards, expected "
+ << num_shards << dendl;
+ return -EINVAL;
+ }
+ if (p->sync_info.realm_epoch < status->sync_info.realm_epoch) {
+ // earlier epoch, take its entire status
+ *status = std::move(*p);
+ } else if (p->sync_info.realm_epoch == status->sync_info.realm_epoch) {
+ // same epoch, take any earlier markers
+ auto m = status->sync_markers.begin();
+ for (auto& shard : p->sync_markers) {
+ if (shard.second < m->second) {
+ m->second = std::move(shard.second);
+ }
+ ++m;
+ }
+ }
+ }
+ return 0;
+}
+
+struct TrimEnv {
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ int num_shards;
+ const std::string& zone;
+ Cursor current; //< cursor to current period
+ epoch_t last_trim_epoch{0}; //< epoch of last mdlog that was purged
+
+ TrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : store(store), http(http), num_shards(num_shards),
+ zone(store->get_zone_params().get_id()),
+ current(store->period_history->get_current())
+ {}
+};
+
+struct MasterTrimEnv : public TrimEnv {
+ connection_map connections; //< peer connections
+ std::vector<rgw_meta_sync_status> peer_status; //< sync status for each peer
+ /// last trim marker for each shard, only applies to current period's mdlog
+ std::vector<std::string> last_trim_markers;
+
+ MasterTrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : TrimEnv(store, http, num_shards),
+ last_trim_markers(num_shards)
+ {
+ auto& period = current.get_period();
+ connections = make_peer_connections(store, period.get_map().zonegroups);
+ connections.erase(zone);
+ peer_status.resize(connections.size());
+ }
+};
+
+struct PeerTrimEnv : public TrimEnv {
+ /// last trim timestamp for each shard, only applies to current period's mdlog
+ std::vector<ceph::real_time> last_trim_timestamps;
+
+ PeerTrimEnv(RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : TrimEnv(store, http, num_shards),
+ last_trim_timestamps(num_shards)
+ {}
+
+ void set_num_shards(int num_shards) {
+ this->num_shards = num_shards;
+ last_trim_timestamps.resize(num_shards);
+ }
+};
+
+} // anonymous namespace
+
+
+/// spawn a trim cr for each shard that needs it, while limiting the number
+/// of concurrent shards
+class MetaMasterTrimShardCollectCR : public RGWShardCollectCR {
+ private:
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ MasterTrimEnv& env;
+ RGWMetadataLog *mdlog;
+ int shard_id{0};
+ std::string oid;
+ const rgw_meta_sync_status& sync_status;
+
+ public:
+ MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog,
+ const rgw_meta_sync_status& sync_status)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), mdlog(mdlog), sync_status(sync_status)
+ {}
+
+ bool spawn_next() override;
+};
+
+bool MetaMasterTrimShardCollectCR::spawn_next()
+{
+ while (shard_id < env.num_shards) {
+ auto m = sync_status.sync_markers.find(shard_id);
+ if (m == sync_status.sync_markers.end()) {
+ shard_id++;
+ continue;
+ }
+ auto& stable = get_stable_marker(m->second);
+ auto& last_trim = env.last_trim_markers[shard_id];
+
+ if (stable <= last_trim) {
+ // already trimmed
+ ldout(cct, 20) << "skipping log shard " << shard_id
+ << " at marker=" << stable
+ << " last_trim=" << last_trim
+ << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl;
+ shard_id++;
+ continue;
+ }
+
+ mdlog->get_shard_oid(shard_id, oid);
+
+ ldout(cct, 10) << "trimming log shard " << shard_id
+ << " at marker=" << stable
+ << " last_trim=" << last_trim
+ << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl;
+ spawn(new RGWSyncLogTrimCR(env.store, oid, stable, &last_trim), false);
+ shard_id++;
+ return true;
+ }
+ return false;
+}
+
+/// spawn rest requests to read each peer's sync status
+class MetaMasterStatusCollectCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ MasterTrimEnv& env;
+ connection_map::iterator c;
+ std::vector<rgw_meta_sync_status>::iterator s;
+ public:
+ MetaMasterStatusCollectCR(MasterTrimEnv& env)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), c(env.connections.begin()), s(env.peer_status.begin())
+ {}
+
+ bool spawn_next() override {
+ if (c == env.connections.end()) {
+ return false;
+ }
+ static rgw_http_param_pair params[] = {
+ { "type", "metadata" },
+ { "status", nullptr },
+ { nullptr, nullptr }
+ };
+
+ ldout(cct, 20) << "query sync status from " << c->first << dendl;
+ auto conn = c->second.get();
+ using StatusCR = RGWReadRESTResourceCR<rgw_meta_sync_status>;
+ spawn(new StatusCR(cct, conn, env.http, "/admin/log/", params, &*s),
+ false);
+ ++c;
+ ++s;
+ return true;
+ }
+};
+
+class MetaMasterTrimCR : public RGWCoroutine {
+ MasterTrimEnv& env;
+ rgw_meta_sync_status min_status; //< minimum sync status of all peers
+ int ret{0};
+
+ public:
+ MetaMasterTrimCR(MasterTrimEnv& env)
+ : RGWCoroutine(env.store->ctx()), env(env)
+ {}
+
+ int operate();
+};
+
+int MetaMasterTrimCR::operate()
+{
+ reenter(this) {
+ // TODO: detect this and fail before we spawn the trim thread?
+ if (env.connections.empty()) {
+ ldout(cct, 4) << "no peers, exiting" << dendl;
+ return set_cr_done();
+ }
+
+ ldout(cct, 10) << "fetching sync status for zone " << env.zone << dendl;
+ // query mdlog sync status from peers
+ yield call(new MetaMasterStatusCollectCR(env));
+
+ // must get a successful reply from all peers to consider trimming
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to fetch sync status from all peers" << dendl;
+ return set_cr_error(ret);
+ }
+
+ // determine the minimum epoch and markers
+ ret = take_min_status(env.store->ctx(), env.peer_status.begin(),
+ env.peer_status.end(), &min_status);
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to calculate min sync status from peers" << dendl;
+ return set_cr_error(ret);
+ }
+ yield {
+ auto store = env.store;
+ auto epoch = min_status.sync_info.realm_epoch;
+ ldout(cct, 4) << "realm epoch min=" << epoch
+ << " current=" << env.current.get_epoch()<< dendl;
+ if (epoch > env.last_trim_epoch + 1) {
+ // delete any prior mdlog periods
+ spawn(new PurgePeriodLogsCR(store, epoch, &env.last_trim_epoch), true);
+ } else {
+ ldout(cct, 10) << "mdlogs already purged up to realm_epoch "
+ << env.last_trim_epoch << dendl;
+ }
+
+ // if realm_epoch == current, trim mdlog based on markers
+ if (epoch == env.current.get_epoch()) {
+ auto mdlog = store->meta_mgr->get_log(env.current.get_period().get_id());
+ spawn(new MetaMasterTrimShardCollectCR(env, mdlog, min_status), true);
+ }
+ }
+ // ignore any errors during purge/trim because we want to hold the lock open
+ return set_cr_done();
+ }
+ return 0;
+}
+
+
+/// read the first entry of the master's mdlog shard and trim to that position
+class MetaPeerTrimShardCR : public RGWCoroutine {
+ RGWMetaSyncEnv& env;
+ RGWMetadataLog *mdlog;
+ const std::string& period_id;
+ const int shard_id;
+ RGWMetadataLogInfo info;
+ ceph::real_time stable; //< safe timestamp to trim, according to master
+ ceph::real_time *last_trim; //< last trimmed timestamp, updated on trim
+ rgw_mdlog_shard_data result; //< result from master's mdlog listing
+
+ public:
+ MetaPeerTrimShardCR(RGWMetaSyncEnv& env, RGWMetadataLog *mdlog,
+ const std::string& period_id, int shard_id,
+ ceph::real_time *last_trim)
+ : RGWCoroutine(env.store->ctx()), env(env), mdlog(mdlog),
+ period_id(period_id), shard_id(shard_id), last_trim(last_trim)
+ {}
+
+ int operate() override;
+};
+
+int MetaPeerTrimShardCR::operate()
+{
+ reenter(this) {
+ // query master's first mdlog entry for this shard
+ yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id,
+ "", 1, &result));
+ if (retcode < 0) {
+ ldout(cct, 5) << "failed to read first entry from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ if (result.entries.empty()) {
+ // if there are no mdlog entries, we don't have a timestamp to compare. we
+ // can't just trim everything, because there could be racing updates since
+ // this empty reply. query the mdlog shard info to read its max timestamp,
+ // then retry the listing to make sure it's still empty before trimming to
+ // that
+ ldout(cct, 10) << "empty master mdlog shard " << shard_id
+ << ", reading last timestamp from shard info" << dendl;
+ // read the mdlog shard info for the last timestamp
+ using ShardInfoCR = RGWReadRemoteMDLogShardInfoCR;
+ yield call(new ShardInfoCR(&env, period_id, shard_id, &info));
+ if (retcode < 0) {
+ ldout(cct, 5) << "failed to read info from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ if (ceph::real_clock::is_zero(info.last_update)) {
+ return set_cr_done(); // nothing to trim
+ }
+ ldout(cct, 10) << "got mdlog shard info with last update="
+ << info.last_update << dendl;
+ // re-read the master's first mdlog entry to make sure it hasn't changed
+ yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id,
+ "", 1, &result));
+ if (retcode < 0) {
+ ldout(cct, 5) << "failed to read first entry from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ // if the mdlog is still empty, trim to max marker
+ if (result.entries.empty()) {
+ stable = info.last_update;
+ } else {
+ stable = result.entries.front().timestamp;
+
+ // can only trim -up to- master's first timestamp, so subtract a second.
+ // (this is why we use timestamps instead of markers for the peers)
+ stable -= std::chrono::seconds(1);
+ }
+ } else {
+ stable = result.entries.front().timestamp;
+ stable -= std::chrono::seconds(1);
+ }
+
+ if (stable <= *last_trim) {
+ ldout(cct, 10) << "skipping log shard " << shard_id
+ << " at timestamp=" << stable
+ << " last_trim=" << *last_trim << dendl;
+ return set_cr_done();
+ }
+
+ ldout(cct, 10) << "trimming log shard " << shard_id
+ << " at timestamp=" << stable
+ << " last_trim=" << *last_trim << dendl;
+ yield {
+ std::string oid;
+ mdlog->get_shard_oid(shard_id, oid);
+ call(new RGWRadosTimelogTrimCR(env.store, oid, real_time{}, stable, "", ""));
+ }
+ if (retcode < 0 && retcode != -ENODATA) {
+ ldout(cct, 1) << "failed to trim mdlog shard " << shard_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ *last_trim = stable;
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class MetaPeerTrimShardCollectCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ PeerTrimEnv& env;
+ RGWMetadataLog *mdlog;
+ const std::string& period_id;
+ RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR
+ int shard_id{0};
+
+ public:
+ MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), mdlog(mdlog), period_id(env.current.get_period().get_id())
+ {
+ meta_env.init(cct, env.store, env.store->rest_master_conn,
+ env.store->get_async_rados(), env.http, nullptr);
+ }
+
+ bool spawn_next() override;
+};
+
+bool MetaPeerTrimShardCollectCR::spawn_next()
+{
+ if (shard_id >= env.num_shards) {
+ return false;
+ }
+ auto& last_trim = env.last_trim_timestamps[shard_id];
+ spawn(new MetaPeerTrimShardCR(meta_env, mdlog, period_id, shard_id, &last_trim),
+ false);
+ shard_id++;
+ return true;
+}
+
+class MetaPeerTrimCR : public RGWCoroutine {
+ PeerTrimEnv& env;
+ rgw_mdlog_info mdlog_info; //< master's mdlog info
+
+ public:
+ MetaPeerTrimCR(PeerTrimEnv& env) : RGWCoroutine(env.store->ctx()), env(env) {}
+
+ int operate();
+};
+
+int MetaPeerTrimCR::operate()
+{
+ reenter(this) {
+ ldout(cct, 10) << "fetching master mdlog info" << dendl;
+ yield {
+ // query mdlog_info from master for oldest_log_period
+ rgw_http_param_pair params[] = {
+ { "type", "metadata" },
+ { nullptr, nullptr }
+ };
+
+ using LogInfoCR = RGWReadRESTResourceCR<rgw_mdlog_info>;
+ call(new LogInfoCR(cct, env.store->rest_master_conn, env.http,
+ "/admin/log/", params, &mdlog_info));
+ }
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to read mdlog info from master" << dendl;
+ return set_cr_error(retcode);
+ }
+ // use master's shard count instead
+ env.set_num_shards(mdlog_info.num_shards);
+
+ if (mdlog_info.realm_epoch > env.last_trim_epoch + 1) {
+ // delete any prior mdlog periods
+ yield call(new PurgePeriodLogsCR(env.store, mdlog_info.realm_epoch,
+ &env.last_trim_epoch));
+ } else {
+ ldout(cct, 10) << "mdlogs already purged through realm_epoch "
+ << env.last_trim_epoch << dendl;
+ }
+
+ // if realm_epoch == current, trim mdlog based on master's markers
+ if (mdlog_info.realm_epoch == env.current.get_epoch()) {
+ yield {
+ auto meta_mgr = env.store->meta_mgr;
+ auto mdlog = meta_mgr->get_log(env.current.get_period().get_id());
+ call(new MetaPeerTrimShardCollectCR(env, mdlog));
+ // ignore any errors during purge/trim because we want to hold the lock open
+ }
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class MetaTrimPollCR : public RGWCoroutine {
+ RGWRados *const store;
+ const utime_t interval; //< polling interval
+ const rgw_raw_obj obj;
+ const std::string name{"meta_trim"}; //< lock name
+ const std::string cookie;
+
+ protected:
+ /// allocate the coroutine to run within the lease
+ virtual RGWCoroutine* alloc_cr() = 0;
+
+ public:
+ MetaTrimPollCR(RGWRados *store, utime_t interval)
+ : RGWCoroutine(store->ctx()), store(store), interval(interval),
+ obj(store->get_zone_params().log_pool, RGWMetadataLogHistory::oid),
+ cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct))
+ {}
+
+ int operate();
+};
+
+int MetaTrimPollCR::operate()
+{
+ reenter(this) {
+ for (;;) {
+ set_status("sleeping");
+ wait(interval);
+
+ // prevent others from trimming for our entire wait interval
+ set_status("acquiring trim lock");
+ yield call(new RGWSimpleRadosLockCR(store->get_async_rados(), store,
+ obj, name, cookie, interval.sec()));
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl;
+ continue;
+ }
+
+ set_status("trimming");
+ yield call(alloc_cr());
+
+ if (retcode < 0) {
+ // on errors, unlock so other gateways can try
+ set_status("unlocking");
+ yield call(new RGWSimpleRadosUnlockCR(store->get_async_rados(), store,
+ obj, name, cookie));
+ }
+ }
+ }
+ return 0;
+}
+
+class MetaMasterTrimPollCR : public MetaTrimPollCR {
+ MasterTrimEnv env; //< trim state to share between calls
+ RGWCoroutine* alloc_cr() override {
+ return new MetaMasterTrimCR(env);
+ }
+ public:
+ MetaMasterTrimPollCR(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+ : MetaTrimPollCR(store, interval),
+ env(store, http, num_shards)
+ {}
+};
+
+class MetaPeerTrimPollCR : public MetaTrimPollCR {
+ PeerTrimEnv env; //< trim state to share between calls
+ RGWCoroutine* alloc_cr() override {
+ return new MetaPeerTrimCR(env);
+ }
+ public:
+ MetaPeerTrimPollCR(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+ : MetaTrimPollCR(store, interval),
+ env(store, http, num_shards)
+ {}
+};
+
+RGWCoroutine* create_meta_log_trim_cr(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+{
+ if (store->is_meta_master()) {
+ return new MetaMasterTrimPollCR(store, http, num_shards, interval);
+ }
+ return new MetaPeerTrimPollCR(store, http, num_shards, interval);
+}
+
+
+struct MetaMasterAdminTrimCR : private MasterTrimEnv, public MetaMasterTrimCR {
+ MetaMasterAdminTrimCR(RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : MasterTrimEnv(store, http, num_shards),
+ MetaMasterTrimCR(*static_cast<MasterTrimEnv*>(this))
+ {}
+};
+
+struct MetaPeerAdminTrimCR : private PeerTrimEnv, public MetaPeerTrimCR {
+ MetaPeerAdminTrimCR(RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : PeerTrimEnv(store, http, num_shards),
+ MetaPeerTrimCR(*static_cast<PeerTrimEnv*>(this))
+ {}
+};
+
+RGWCoroutine* create_admin_meta_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards)
+{
+ if (store->is_meta_master()) {
+ return new MetaMasterAdminTrimCR(store, http, num_shards);
+ }
+ return new MetaPeerAdminTrimCR(store, http, num_shards);
+}
diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h
index 878c5d8e18b..c651f7a9ad1 100644
--- a/src/rgw/rgw_sync.h
+++ b/src/rgw/rgw_sync.h
@@ -8,6 +8,8 @@
#include "include/stringify.h"
#include "common/RWLock.h"
+#include <atomic>
+
#define ERROR_LOGGER_SHARDS 32
#define RGW_SYNC_ERROR_LOG_SHARD_PREFIX "sync.error-log"
@@ -65,7 +67,7 @@ class RGWSyncErrorLogger {
vector<string> oids;
int num_shards;
- atomic_t counter;
+ std::atomic<int64_t> counter = { 0 };
public:
RGWSyncErrorLogger(RGWRados *_store, const string &oid_prefix, int _num_shards);
RGWCoroutine *log_error_cr(const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message);
@@ -191,12 +193,11 @@ class RGWRemoteMetaLog : public RGWCoroutinesManager {
RGWSyncBackoff backoff;
RGWMetaSyncEnv sync_env;
- rgw_meta_sync_status sync_status;
void init_sync_env(RGWMetaSyncEnv *env);
- int store_sync_info();
+ int store_sync_info(const rgw_meta_sync_info& sync_info);
- atomic_t going_down;
+ std::atomic<bool> going_down = { false };
public:
RGWRemoteMetaLog(RGWRados *_store, RGWAsyncRadosProcessor *async_rados,
@@ -214,7 +215,7 @@ public:
int read_log_info(rgw_mdlog_info *log_info);
int read_master_log_shards_info(const string& master_period, map<int, RGWMetadataLogInfo> *shards_info);
int read_master_log_shards_next(const string& period, map<int, string> shard_markers, map<int, rgw_mdlog_shard_data> *result);
- int read_sync_status();
+ int read_sync_status(rgw_meta_sync_status *sync_status);
int init_sync_status();
int run_sync();
@@ -223,7 +224,6 @@ public:
RGWMetaSyncEnv& get_sync_env() {
return sync_env;
}
- const rgw_meta_sync_status& get_sync_status() const { return sync_status; }
};
class RGWMetaSyncStatusManager {
@@ -257,13 +257,10 @@ public:
: store(_store), master_log(store, async_rados, this),
ts_to_shard_lock("ts_to_shard_lock") {}
int init();
- void finish();
- const rgw_meta_sync_status& get_sync_status() const {
- return master_log.get_sync_status();
+ int read_sync_status(rgw_meta_sync_status *sync_status) {
+ return master_log.read_sync_status(sync_status);
}
-
- int read_sync_status() { return master_log.read_sync_status(); }
int init_sync_status() { return master_log.init_sync_status(); }
int read_log_info(rgw_mdlog_info *log_info) {
return master_log.read_log_info(log_info);
@@ -455,5 +452,13 @@ public:
int operate() override;
};
+// MetaLogTrimCR factory function
+RGWCoroutine* create_meta_log_trim_cr(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval);
+
+// factory function for mdlog trim via radosgw-admin
+RGWCoroutine* create_admin_meta_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards);
#endif
diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
index 79bffb2018f..b79fecb6a85 100644
--- a/src/rgw/rgw_tools.cc
+++ b/src/rgw/rgw_tools.cc
@@ -42,7 +42,6 @@ int rgw_get_system_obj(RGWRados *rgwstore, RGWObjectCtx& obj_ctx, const rgw_pool
RGWObjVersionTracker *objv_tracker, real_time *pmtime, map<string, bufferlist> *pattrs,
rgw_cache_entry_info *cache_info)
{
- struct rgw_err err;
bufferlist::iterator iter;
int request_len = READ_CHUNK_LEN;
rgw_raw_obj obj(pool, key);
@@ -58,7 +57,6 @@ int rgw_get_system_obj(RGWRados *rgwstore, RGWObjectCtx& obj_ctx, const rgw_pool
rop.stat_params.attrs = pattrs;
rop.stat_params.lastmod = pmtime;
- rop.stat_params.perr = &err;
int ret = rop.stat(objv_tracker);
if (ret < 0)
diff --git a/src/rgw/rgw_torrent.h b/src/rgw/rgw_torrent.h
index 6d2b28cd91a..ab8e89bbd79 100644
--- a/src/rgw/rgw_torrent.h
+++ b/src/rgw/rgw_torrent.h
@@ -11,7 +11,6 @@
#include "rgw_rados.h"
#include "rgw_common.h"
-using namespace std;
using ceph::crypto::SHA1;
struct req_state;
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
index 50cc1b62909..7a27d0e7007 100644
--- a/src/rgw/rgw_user.h
+++ b/src/rgw/rgw_user.h
@@ -19,8 +19,6 @@
#include "common/Formatter.h"
#include "rgw_formats.h"
-using namespace std;
-
#define RGW_USER_ANON_ID "anonymous"
#define SECRET_KEY_LEN 40
diff --git a/src/rgw/rgw_xml.h b/src/rgw/rgw_xml.h
index 2517c7b78e8..1f37a6a49e2 100644
--- a/src/rgw/rgw_xml.h
+++ b/src/rgw/rgw_xml.h
@@ -10,9 +10,6 @@
#include <include/types.h>
#include <common/Formatter.h>
-using namespace std;
-
-
class XMLObj;
class XMLObjIter {