summaryrefslogtreecommitdiffstats
path: root/src/libcephfs_proxy/proxy_mount.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcephfs_proxy/proxy_mount.c')
-rw-r--r--src/libcephfs_proxy/proxy_mount.c1246
1 files changed, 1246 insertions, 0 deletions
diff --git a/src/libcephfs_proxy/proxy_mount.c b/src/libcephfs_proxy/proxy_mount.c
new file mode 100644
index 00000000000..abfef1232c2
--- /dev/null
+++ b/src/libcephfs_proxy/proxy_mount.c
@@ -0,0 +1,1246 @@
+
+#include "proxy_mount.h"
+#include "proxy_helpers.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+/* Maximum number of symlinks to visit while resolving a path before returning
+ * ELOOP. */
+#define PROXY_MAX_SYMLINKS 16
+
+struct _proxy_linked_str;
+typedef struct _proxy_linked_str proxy_linked_str_t;
+
+/* This structure is used to handle symlinks found during the walk of a path.
+ *
+ * We'll start with an initial string representing a path. If one of the
+ * components is found to be a symlink, a new proxy_linked_str_t will be
+ * created with the content of the symlink. Then the new string will point
+ * to the old string, which may still contain some additional path components.
+ * The new string will be traversed resolving symlinks as they are found in the
+ * same way. Once it finished, the old string is recovered and traversal
+ * continues from the point it was left. */
+struct _proxy_linked_str {
+ proxy_linked_str_t *next;
+ char *remaining;
+ char data[];
+};
+
+/* This structure is used to traverse a path while resolving any symlink
+ * found. At the end, it will contain the realpath of the entry and its
+ * inode. */
+typedef struct _proxy_path_iterator {
+ struct ceph_statx stx;
+ struct ceph_mount_info *cmount;
+ proxy_linked_str_t *lstr;
+ UserPerm *perms;
+ struct Inode *root;
+ struct Inode *base;
+ char *realpath;
+ uint64_t root_ino;
+ uint64_t base_ino;
+ uint32_t realpath_size;
+ uint32_t realpath_len;
+ uint32_t symlinks;
+ bool release;
+ bool follow;
+} proxy_path_iterator_t;
+
+typedef struct _proxy_config {
+ int32_t src;
+ int32_t dst;
+ int32_t size;
+ int32_t total;
+ void *buffer;
+} proxy_config_t;
+
+typedef struct _proxy_change {
+ list_t list;
+ uint32_t size;
+ char data[];
+} proxy_change_t;
+
+typedef struct _proxy_iter {
+ proxy_instance_t *instance;
+ list_t *item;
+} proxy_iter_t;
+
+typedef struct _proxy_instance_pool {
+ pthread_mutex_t mutex;
+ list_t hash[256];
+} proxy_mount_pool_t;
+
+static proxy_mount_pool_t instance_pool = {
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+/* Ceph client instance sharing
+ *
+ * The main purpose of the libcephfs proxy is to avoid the multiple independent
+ * data caches that are created when libcephfs is used from different processes.
+ * However the cache is not created per process but per client instance, so each
+ * call to `ceph_create()` creates its own private data cache instance. Just
+ * forwarding the libcephfs API calls to a single proxy process is not enough to
+ * solve the problem.
+ *
+ * The proxy will try to reuse existing client instances to reduce the number of
+ * independent caches. However it's not always possible to map all proxy clients
+ * to a single libcephfs instance. When different settings are used, separate
+ * Ceph instances are required to avoid unwanted behaviors.
+ *
+ * Even though it's possible that some Ceph options may be compatible even if
+ * they have different values, the proxy won't try to handle these cases. It
+ * will consider the configuration as a black box, and only 100% equal
+ * configurations will share the Ceph client instance.
+ */
+
+/* Ceph configuration file management
+ *
+ * We won't try to parse Ceph configuration files. The proxy only wants to know
+ * if a configuration is equal or not. To do so, when a configuration file is
+ * passed to the proxy, it will create a private copy and compute an SHA256
+ * hash. If the hash doesn't match, the configuration is considered different,
+ * even if it's not a real difference (like additional empty lines or the order
+ * of the options).
+ *
+ * The private copy is necessary to enforce that the settings are not changed
+ * concurrently, which could make us believe that two configurations are equal
+ * when they are not.
+ *
+ * Besides a configuration file, the user can also make manual configuration
+ * changes by using `ceph_conf_set()`. These changes are also tracked and
+ * compared to be sure that the active configuration matches. Only if the
+ * configuration file is exactly equal and all the applied changes are the same,
+ * and in the same order, the Ceph client instance will be shared.
+ */
+
+int32_t proxy_inode_ref(proxy_mount_t *mount, uint64_t inode)
+{
+ inodeno_t ino;
+ struct Inode *tmp;
+ int32_t err;
+
+ /* There's no way to tell libcephfs to increase the reference counter of
+ * an inode, so we do a full lookup for now. */
+
+ ino.val = inode;
+
+ err = ceph_ll_lookup_inode(proxy_cmount(mount), ino, &tmp);
+ if (err < 0) {
+ proxy_log(LOG_ERR, -err, "ceph_ll_loolkup_inode() failed");
+ }
+
+ return err;
+}
+
+static proxy_linked_str_t *proxy_linked_str_create(const char *str,
+ proxy_linked_str_t *next)
+{
+ proxy_linked_str_t *lstr;
+ uint32_t len;
+
+ len = strlen(str) + 1;
+ lstr = proxy_malloc(sizeof(proxy_linked_str_t) + len);
+ if (lstr != NULL) {
+ lstr->next = next;
+ if (len > 1) {
+ lstr->remaining = lstr->data;
+ memcpy(lstr->data, str, len);
+ } else {
+ lstr->remaining = NULL;
+ }
+ }
+
+ return lstr;
+}
+
+static proxy_linked_str_t *proxy_linked_str_next(proxy_linked_str_t *lstr)
+{
+ proxy_linked_str_t *next;
+
+ next = lstr->next;
+ proxy_free(lstr);
+
+ return next;
+}
+
+static void proxy_linked_str_destroy(proxy_linked_str_t *lstr)
+{
+ while (lstr != NULL) {
+ lstr = proxy_linked_str_next(lstr);
+ }
+}
+
+static bool proxy_linked_str_empty(proxy_linked_str_t *lstr)
+{
+ return lstr->remaining == NULL;
+}
+
+static char *proxy_linked_str_scan(proxy_linked_str_t *lstr, char ch)
+{
+ char *current;
+
+ current = lstr->remaining;
+ lstr->remaining = strchr(lstr->remaining, ch);
+ if (lstr->remaining != NULL) {
+ *lstr->remaining++ = 0;
+ }
+
+ return current;
+}
+
+static int32_t proxy_path_iterator_init(proxy_path_iterator_t *iter,
+ proxy_mount_t *mount, const char *path,
+ UserPerm *perms, bool realpath,
+ bool follow)
+{
+ uint32_t len;
+ char ch;
+
+ if (path == NULL) {
+ return proxy_log(LOG_ERR, EINVAL, "NULL path received");
+ }
+
+ memset(&iter->stx, 0, sizeof(iter->stx));
+ iter->cmount = proxy_cmount(mount);
+ iter->perms = perms;
+ iter->root = mount->root;
+ iter->root_ino = mount->root_ino;
+ iter->base = mount->cwd;
+ iter->base_ino = mount->cwd_ino;
+ iter->symlinks = 0;
+ iter->release = false;
+ iter->follow = follow;
+
+ len = strlen(path) + 1;
+
+ ch = *path;
+ if (ch == '/') {
+ iter->base = mount->root;
+ iter->base_ino = mount->root_ino;
+ path++;
+ }
+
+ iter->realpath = NULL;
+ iter->realpath_len = 0;
+ iter->realpath_size = 0;
+
+ if (realpath) {
+ if (ch != '/') {
+ len += mount->cwd_path_len;
+ }
+ len = (len + 63) & ~63;
+ iter->realpath_size = len;
+
+ iter->realpath = proxy_malloc(len);
+ if (iter->realpath == NULL) {
+ return -ENOMEM;
+ }
+ if (ch != '/') {
+ memcpy(iter->realpath, mount->cwd_path,
+ mount->cwd_path_len + 1);
+ iter->realpath_len = mount->cwd_path_len;
+ } else {
+ iter->realpath[0] = '/';
+ iter->realpath[1] = 0;
+ iter->realpath_len = 1;
+ }
+ }
+
+ iter->lstr = proxy_linked_str_create(path, NULL);
+ if (iter->lstr == NULL) {
+ proxy_free(iter->realpath);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static char *proxy_path_iterator_next(proxy_path_iterator_t *iter)
+{
+ while (proxy_linked_str_empty(iter->lstr)) {
+ iter->lstr = proxy_linked_str_next(iter->lstr);
+ if (iter->lstr == NULL) {
+ return NULL;
+ }
+ }
+
+ return proxy_linked_str_scan(iter->lstr, '/');
+}
+
+static bool proxy_path_iterator_is_last(proxy_path_iterator_t *iter)
+{
+ proxy_linked_str_t *lstr;
+
+ lstr = iter->lstr;
+ while (proxy_linked_str_empty(iter->lstr)) {
+ lstr = lstr->next;
+ if (lstr == NULL) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void proxy_path_iterator_destroy(proxy_path_iterator_t *iter)
+{
+ if (iter->release) {
+ ceph_ll_put(iter->cmount, iter->base);
+ }
+
+ proxy_free(iter->realpath);
+ proxy_linked_str_destroy(iter->lstr);
+}
+
+static int32_t proxy_path_iterator_resolve(proxy_path_iterator_t *iter)
+{
+ static __thread char path[PATH_MAX];
+ proxy_linked_str_t *lstr;
+ char *ptr;
+ int32_t err;
+
+ if (++iter->symlinks > PROXY_MAX_SYMLINKS) {
+ return proxy_log(LOG_ERR, ELOOP, "Too many symbolic links");
+ }
+
+ err = ceph_ll_readlink(iter->cmount, iter->base, path, sizeof(path),
+ iter->perms);
+ if (err < 0) {
+ return proxy_log(LOG_ERR, -err, "ceph_ll_readlink() failed");
+ }
+
+ ptr = path;
+ if (*ptr == '/') {
+ if (iter->release) {
+ ceph_ll_put(iter->cmount, iter->base);
+ }
+ iter->base = iter->root;
+ iter->base_ino = iter->root_ino;
+ iter->release = false;
+ if (iter->realpath != NULL) {
+ iter->realpath[1] = 0;
+ iter->realpath_len = 1;
+ }
+
+ ptr++;
+ }
+
+ lstr = proxy_linked_str_create(ptr, iter->lstr);
+ if (lstr == NULL) {
+ return -ENOMEM;
+ }
+ iter->lstr = lstr;
+
+ return 0;
+}
+
+static int32_t proxy_path_iterator_append(proxy_path_iterator_t *iter,
+ const char *name)
+{
+ uint32_t len, size;
+ int32_t err;
+
+ len = strlen(name) + 1;
+ size = iter->realpath_size;
+ if (iter->realpath_len + len >= size) {
+ do {
+ size <<= 1;
+ } while (iter->realpath_len + len >= size);
+ err = proxy_realloc((void **)&iter->realpath, size);
+ if (err < 0) {
+ return err;
+ }
+ iter->realpath_size = size;
+ }
+
+ if (iter->realpath_len > 1) {
+ iter->realpath[iter->realpath_len++] = '/';
+ }
+ memcpy(iter->realpath + iter->realpath_len, name, len);
+ iter->realpath_len += len - 1;
+
+ return 0;
+}
+
+static void proxy_path_iterator_remove(proxy_path_iterator_t *iter)
+{
+ while ((iter->realpath_len > 0) &&
+ (iter->realpath[--iter->realpath_len] != '/')) {
+ }
+}
+
+static int32_t proxy_path_lookup(struct ceph_mount_info *cmount,
+ struct Inode *parent, const char *name,
+ struct Inode **inode, struct ceph_statx *stx,
+ uint32_t want, uint32_t flags, UserPerm *perms)
+{
+ int32_t err;
+
+ err = ceph_ll_lookup(cmount, parent, name, inode, stx, want, flags,
+ perms);
+ if (err < 0) {
+ return proxy_log(LOG_ERR, -err, "ceph_ll_lookup() failed");
+ }
+
+ return err;
+}
+
+static int32_t proxy_path_iterator_lookup(proxy_path_iterator_t *iter,
+ const char *name)
+{
+ struct Inode *inode;
+ int32_t err;
+
+ if (S_ISLNK(iter->stx.stx_mode)) {
+ return proxy_path_iterator_resolve(iter);
+ }
+
+ err = proxy_path_lookup(iter->cmount, iter->base, name, &inode,
+ &iter->stx, CEPH_STATX_INO | CEPH_STATX_MODE,
+ AT_SYMLINK_NOFOLLOW, iter->perms);
+ if (err < 0) {
+ return err;
+ }
+
+ if (iter->realpath != NULL) {
+ if ((name[0] == '.') && (name[1] == '.') && (name[2] == 0)) {
+ proxy_path_iterator_remove(iter);
+ } else {
+ err = proxy_path_iterator_append(iter, name);
+ if (err < 0) {
+ ceph_ll_put(iter->cmount, inode);
+ return err;
+ }
+ }
+ }
+
+ if (iter->release) {
+ ceph_ll_put(iter->cmount, iter->base);
+ }
+ iter->base = inode;
+ iter->base_ino = iter->stx.stx_ino;
+ iter->release = true;
+
+ if (iter->follow && S_ISLNK(iter->stx.stx_mode) &&
+ proxy_path_iterator_is_last(iter)) {
+ return proxy_path_iterator_resolve(iter);
+ }
+
+ return 0;
+}
+
+/* Implements a path walk ensuring that it's not possible to go higher than the
+ * root mount point used in ceph_mount(). This means that it handles absolute
+ * paths and ".." entries in a special way, including paths found in symbolic
+ * links. */
+int32_t proxy_path_resolve(proxy_mount_t *mount, const char *path,
+ struct Inode **inode, struct ceph_statx *stx,
+ uint32_t want, uint32_t flags, UserPerm *perms,
+ char **realpath)
+{
+ proxy_path_iterator_t iter;
+ char *name, c;
+ int32_t err;
+
+ err = proxy_path_iterator_init(&iter, mount, path, perms,
+ realpath != NULL,
+ (flags & AT_SYMLINK_NOFOLLOW) == 0);
+ if (err < 0) {
+ return err;
+ }
+
+ while ((err >= 0) &&
+ ((name = proxy_path_iterator_next(&iter)) != NULL)) {
+ c = *name;
+ if (c == '.') {
+ c = name[1];
+ if ((c == '.') && (iter.base == mount->root)) {
+ c = name[2];
+ }
+ }
+ if (c == 0) {
+ continue;
+ }
+
+ err = proxy_path_iterator_lookup(&iter, name);
+ }
+
+ if (err >= 0) {
+ err = proxy_path_lookup(proxy_cmount(mount), iter.base, ".",
+ inode, stx, want, flags, iter.perms);
+ }
+
+ if ((err >= 0) && (realpath != NULL)) {
+ *realpath = iter.realpath;
+ iter.realpath = NULL;
+ }
+
+ proxy_path_iterator_destroy(&iter);
+
+ return err;
+}
+
+static int32_t proxy_config_source_prepare(const char *config, struct stat *st)
+{
+ int32_t fd, err;
+
+ fd = open(config, O_RDONLY);
+ if (fd < 0) {
+ return proxy_log(LOG_ERR, errno, "open() failed");
+ }
+
+ if (fstat(fd, st) < 0) {
+ err = proxy_log(LOG_ERR, errno, "fstat() failed");
+ goto failed;
+ }
+
+ if (!S_ISREG(st->st_mode)) {
+ err = proxy_log(LOG_ERR, EINVAL,
+ "Configuration file is not a regular file");
+ goto failed;
+ }
+
+ return fd;
+
+failed:
+ close(fd);
+
+ return err;
+}
+
+static void proxy_config_source_close(int32_t fd)
+{
+ close(fd);
+}
+
+static int32_t proxy_config_source_read(int32_t fd, void *buffer, size_t size)
+{
+ ssize_t len;
+
+ len = read(fd, buffer, size);
+ if (len < 0) {
+ return proxy_log(LOG_ERR, errno, "read() failed");
+ }
+
+ return len;
+}
+
+static int32_t proxy_config_source_validate(int32_t fd, struct stat *before,
+ int32_t size)
+{
+ struct stat after;
+
+ if (fstat(fd, &after) < 0) {
+ return proxy_log(LOG_ERR, errno, "fstat() failed");
+ }
+
+ if ((before->st_size != size) || (before->st_size != after.st_size) ||
+ (before->st_blocks != after.st_blocks) ||
+ (before->st_ctim.tv_sec != after.st_ctim.tv_sec) ||
+ (before->st_ctim.tv_nsec != after.st_ctim.tv_nsec) ||
+ (before->st_mtim.tv_sec != after.st_mtim.tv_sec) ||
+ (before->st_mtim.tv_nsec != after.st_mtim.tv_nsec)) {
+ proxy_log(LOG_WARN, 0,
+ "Configuration file has been modified while "
+ "reading it");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+static int32_t proxy_config_destination_prepare(void)
+{
+ int32_t fd;
+
+ fd = openat(AT_FDCWD, ".", O_TMPFILE | O_WRONLY, 0600);
+ if (fd < 0) {
+ return proxy_log(LOG_ERR, errno, "openat() failed");
+ }
+
+ return fd;
+}
+
+static void proxy_config_destination_close(int32_t fd)
+{
+ close(fd);
+}
+
+static int32_t proxy_config_destination_write(int32_t fd, void *data,
+ size_t size)
+{
+ ssize_t len;
+
+ len = write(fd, data, size);
+ if (len < 0) {
+ return proxy_log(LOG_ERR, errno, "write() failed");
+ }
+ if (len != size) {
+ return proxy_log(LOG_ERR, ENOSPC, "Partial write");
+ }
+
+ return size;
+}
+
+static int32_t proxy_config_destination_commit(int32_t fd, const char *name)
+{
+ char path[32];
+
+ if (fsync(fd) < 0) {
+ return proxy_log(LOG_ERR, errno, "fsync() failed");
+ }
+
+ if (linkat(fd, "", AT_FDCWD, name, AT_EMPTY_PATH) < 0) {
+ if (errno == EEXIST) {
+ return 0;
+ }
+
+ /* This may fail if the user doesn't have CAP_DAC_READ_SEARCH.
+ * In this case we attempt to link it using the /proc
+ * filesystem. */
+ }
+
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
+ if (linkat(AT_FDCWD, path, AT_FDCWD, name, AT_SYMLINK_FOLLOW) < 0) {
+ if (errno != EEXIST) {
+ return proxy_log(LOG_ERR, errno, "linkat() failed");
+ }
+ }
+
+ return 0;
+}
+
+static int32_t proxy_config_transfer(void **ptr, void *data, int32_t idx)
+{
+ proxy_config_t *cfg;
+ int32_t len, err;
+
+ cfg = data;
+
+ len = proxy_config_source_read(cfg->src, cfg->buffer, cfg->size);
+ if (len <= 0) {
+ return len;
+ }
+
+ err = proxy_config_destination_write(cfg->dst, cfg->buffer, len);
+ if (err < 0) {
+ return err;
+ }
+
+ cfg->total += len;
+
+ *ptr = cfg->buffer;
+
+ return len;
+}
+
+/* Copies and checksums a given configuration to a file and makes sure that it
+ * has not been modified. */
+static int32_t proxy_config_prepare(const char *config, char *path,
+ int32_t size)
+{
+ char hash[65];
+ proxy_config_t cfg;
+ struct stat before;
+ int32_t err;
+
+ cfg.size = 4096;
+ cfg.buffer = proxy_malloc(cfg.size);
+ if (cfg.buffer == NULL) {
+ return -ENOMEM;
+ }
+ cfg.total = 0;
+
+ cfg.src = proxy_config_source_prepare(config, &before);
+ if (cfg.src < 0) {
+ err = cfg.src;
+ goto done_mem;
+ }
+
+ cfg.dst = proxy_config_destination_prepare();
+ if (cfg.dst < 0) {
+ err = cfg.dst;
+ goto done_src;
+ }
+
+ err = proxy_hash_hex(hash, sizeof(hash), proxy_config_transfer, &cfg);
+ if (err < 0) {
+ goto done_dst;
+ }
+
+ err = proxy_config_source_validate(cfg.src, &before, cfg.total);
+ if (err < 0) {
+ goto done_dst;
+ }
+
+ err = snprintf(path, size, "ceph-%s.conf", hash);
+ if (err < 0) {
+ err = proxy_log(LOG_ERR, errno, "snprintf() failed");
+ goto done_dst;
+ }
+ if (err >= size) {
+ err = proxy_log(LOG_ERR, ENOBUFS,
+ "Insufficient space to store the name");
+ goto done_dst;
+ }
+
+ err = proxy_config_destination_commit(cfg.dst, path);
+
+done_dst:
+ proxy_config_destination_close(cfg.dst);
+
+done_src:
+ proxy_config_source_close(cfg.src);
+
+done_mem:
+ proxy_free(cfg.buffer);
+
+ return err;
+}
+
+/* Record changes to the configuration. */
+static int32_t proxy_instance_change_add(proxy_instance_t *instance,
+ const char *arg1, const char *arg2,
+ const char *arg3)
+{
+ proxy_change_t *change;
+ int32_t len[3], total;
+
+ len[0] = strlen(arg1) + 1;
+ if (arg2 == NULL) {
+ arg2 = "<null>";
+ }
+ len[1] = strlen(arg2) + 1;
+ len[2] = 0;
+ if (arg3 != NULL) {
+ len[2] = strlen(arg3) + 1;
+ }
+
+ total = len[0] + len[1] + len[2];
+
+ change = proxy_malloc(sizeof(proxy_change_t) + total);
+ if (change == NULL) {
+ return -ENOMEM;
+ }
+ change->size = total;
+
+ memcpy(change->data, arg1, len[0]);
+ memcpy(change->data + len[0], arg2, len[1]);
+ if (arg3 != NULL) {
+ memcpy(change->data + len[0] + len[1], arg3, len[2]);
+ }
+
+ list_add_tail(&change->list, &instance->changes);
+
+ return 0;
+}
+
+static void proxy_instance_change_del(proxy_instance_t *instance)
+{
+ proxy_change_t *change;
+
+ change = list_last_entry(&instance->changes, proxy_change_t, list);
+ list_del(&change->list);
+
+ proxy_free(change);
+}
+
+/* Destroy a Ceph client instance */
+static void proxy_instance_destroy(proxy_instance_t *instance)
+{
+ if (instance->mounted) {
+ ceph_unmount(instance->cmount);
+ }
+
+ if (instance->cmount != NULL) {
+ ceph_release(instance->cmount);
+ }
+
+ while (!list_empty(&instance->changes)) {
+ proxy_instance_change_del(instance);
+ }
+
+ proxy_free(instance);
+}
+
+/* Create a new Ceph client instance with the provided id */
+static int32_t proxy_instance_create(proxy_instance_t **pinstance,
+ const char *id)
+{
+ struct ceph_mount_info *cmount;
+ proxy_instance_t *instance;
+ int32_t err;
+
+ instance = proxy_malloc(sizeof(proxy_instance_t));
+ if (instance == NULL) {
+ return -ENOMEM;
+ }
+
+ list_init(&instance->siblings);
+ list_init(&instance->changes);
+ instance->cmount = NULL;
+ instance->inited = false;
+ instance->mounted = false;
+
+ err = proxy_instance_change_add(instance, "id", id, NULL);
+ if (err < 0) {
+ goto failed;
+ }
+
+ err = ceph_create(&cmount, id);
+ if (err < 0) {
+ proxy_log(LOG_ERR, -err, "ceph_create() failed");
+ goto failed;
+ }
+
+ instance->cmount = cmount;
+
+ *pinstance = instance;
+
+ return 0;
+
+failed:
+ proxy_instance_destroy(instance);
+
+ return err;
+}
+
+static int32_t proxy_instance_release(proxy_instance_t *instance)
+{
+ if (instance->mounted) {
+ return proxy_log(LOG_ERR, EISCONN,
+ "Cannot release an active connection");
+ }
+
+ proxy_instance_destroy(instance);
+
+ return 0;
+}
+
+/* Assign a configuration file to the instance. */
+static int32_t proxy_instance_config(proxy_instance_t *instance,
+ const char *config)
+{
+ char path[128], *ppath;
+ int32_t err;
+
+ if (instance->mounted) {
+ return proxy_log(LOG_ERR, EISCONN,
+ "Cannot configure a mounted instance");
+ }
+
+ ppath = NULL;
+ if (config != NULL) {
+ err = proxy_config_prepare(config, path, sizeof(path));
+ if (err < 0) {
+ return err;
+ }
+ ppath = path;
+ }
+
+ err = proxy_instance_change_add(instance, "conf", ppath, NULL);
+ if (err < 0) {
+ return err;
+ }
+
+ err = ceph_conf_read_file(instance->cmount, ppath);
+ if (err < 0) {
+ proxy_instance_change_del(instance);
+ }
+
+ return err;
+}
+
+static int32_t proxy_instance_option_get(proxy_instance_t *instance,
+ const char *name, char *value,
+ size_t size)
+{
+ int32_t err, res;
+
+ if (name == NULL) {
+ return proxy_log(LOG_ERR, EINVAL, "NULL option name");
+ }
+
+ res = ceph_conf_get(instance->cmount, name, value, size);
+ if (res < 0) {
+ return proxy_log(
+ LOG_ERR, -res,
+ "Failed to get configuration from a client instance");
+ }
+
+ err = proxy_instance_change_add(instance, "get", name, value);
+ if (err < 0) {
+ return err;
+ }
+
+ return res;
+}
+
+static int32_t proxy_instance_option_set(proxy_instance_t *instance,
+ const char *name, const char *value)
+{
+ int32_t err;
+
+ if ((name == NULL) || (value == NULL)) {
+ return proxy_log(LOG_ERR, EINVAL, "NULL value or option name");
+ }
+
+ if (instance->mounted) {
+ return proxy_log(LOG_ERR, EISCONN,
+ "Cannot configure a mounted instance");
+ }
+
+ err = proxy_instance_change_add(instance, "set", name, value);
+ if (err < 0) {
+ return err;
+ }
+
+ err = ceph_conf_set(instance->cmount, name, value);
+ if (err < 0) {
+ proxy_log(LOG_ERR, -err,
+ "Failed to configure a client instance");
+ proxy_instance_change_del(instance);
+ }
+
+ return err;
+}
+
+static int32_t proxy_instance_select(proxy_instance_t *instance, const char *fs)
+{
+ int32_t err;
+
+ if (instance->mounted) {
+ return proxy_log(
+ LOG_ERR, EISCONN,
+ "Cannot select a filesystem on a mounted instance");
+ }
+
+ err = proxy_instance_change_add(instance, "fs", fs, NULL);
+ if (err < 0) {
+ return err;
+ }
+
+ err = ceph_select_filesystem(instance->cmount, fs);
+ if (err < 0) {
+ proxy_log(LOG_ERR, -err,
+ "Failed to select a filesystem on a client instance");
+ proxy_instance_change_del(instance);
+ }
+
+ return err;
+}
+
+static int32_t proxy_instance_init(proxy_instance_t *instance)
+{
+ if (instance->mounted || instance->inited) {
+ return 0;
+ }
+
+ /* ceph_init() does start several internal threads. However, an instance
+ * may not end up being mounted if the configuration matches with
+ * another mounted instance. Since ceph_mount() also calls ceph_init()
+ * if not already done, we avoid initializing it here to reduce resource
+ * consumption. */
+
+ instance->inited = true;
+
+ return 0;
+}
+
+static int32_t proxy_instance_hash(void **ptr, void *data, int32_t idx)
+{
+ proxy_iter_t *iter;
+ proxy_change_t *change;
+
+ iter = data;
+
+ if (iter->item == &iter->instance->changes) {
+ return 0;
+ }
+
+ change = list_entry(iter->item, proxy_change_t, list);
+ iter->item = iter->item->next;
+
+ *ptr = change->data;
+
+ return change->size;
+}
+
+/* Check if an existing instance matches the configuration used for the current
+ * one. If so, share the mount. Otherwise, create a new mount. */
+static int32_t proxy_instance_mount(proxy_instance_t **pinstance)
+{
+ proxy_instance_t *instance, *existing;
+ proxy_iter_t iter;
+ list_t *list;
+ int32_t err;
+
+ instance = *pinstance;
+
+ if (instance->mounted) {
+ return proxy_log(LOG_ERR, EISCONN,
+ "Cannot mount and already mounted instance");
+ }
+
+ iter.instance = instance;
+ iter.item = instance->changes.next;
+
+ /* Create a hash that includes all settings. */
+ err = proxy_hash(instance->hash, sizeof(instance->hash),
+ proxy_instance_hash, &iter);
+ if (err < 0) {
+ return err;
+ }
+
+ list = &instance_pool.hash[instance->hash[0]];
+
+ proxy_mutex_lock(&instance_pool.mutex);
+
+ if (list->next == NULL) {
+ list_init(list);
+ } else {
+ list_for_each_entry(existing, list, list) {
+ if (memcmp(existing->hash, instance->hash, 32) == 0) {
+ /* A match has been found. Instead of destroying
+ * the current instance, it's stored as a
+ * sibling of the one found. It will be
+ * reassigned to an instance when someone
+ * unmounts. */
+ list_add(&instance->list, &existing->siblings);
+ goto found;
+ }
+ }
+ }
+
+ /* No matching instance has been found. Just create a new one. The root
+ * is always "/". Each virtual mount point will locally store its root
+ * path. */
+ err = ceph_mount(instance->cmount, "/");
+ if (err >= 0) {
+ err = ceph_ll_lookup_root(instance->cmount, &instance->root);
+ if (err >= 0) {
+ instance->inited = true;
+ instance->mounted = true;
+ list_add(&instance->list, list);
+ } else {
+ ceph_unmount(instance->cmount);
+ }
+ }
+
+ existing = NULL;
+
+found:
+ proxy_mutex_unlock(&instance_pool.mutex);
+
+ if (err < 0) {
+ return proxy_log(LOG_ERR, -err, "ceph_mount() failed");
+ }
+
+ if (existing != NULL) {
+ proxy_log(LOG_INFO, 0, "Shared a client instance (%p)",
+ existing);
+ *pinstance = existing;
+ } else {
+ proxy_log(LOG_INFO, 0, "Created a new client instance (%p)",
+ instance);
+ }
+
+ return 0;
+}
+
+static int32_t proxy_instance_unmount(proxy_instance_t **pinstance)
+{
+ proxy_instance_t *instance, *sibling;
+ int32_t err;
+
+ instance = *pinstance;
+
+ if (!instance->mounted) {
+ return proxy_log(LOG_ERR, ENOTCONN,
+ "Cannot unmount an already unmount instance");
+ }
+
+ sibling = NULL;
+
+ proxy_mutex_lock(&instance_pool.mutex);
+
+ if (list_empty(&instance->siblings)) {
+ /* This is the last mount using this instance. We unmount it. */
+ list_del(&instance->list);
+ instance->mounted = false;
+ } else {
+ /* There are other mounts sharing this instance. Take one of the
+ * saved siblings, which share the exact same configuration but
+ * are not mounted, to assign it to the current mount. */
+ sibling = list_first_entry(&instance->siblings,
+ proxy_instance_t, list);
+ list_del_init(&sibling->list);
+ }
+
+ proxy_mutex_unlock(&instance_pool.mutex);
+
+ if (sibling == NULL) {
+ ceph_ll_put(instance->cmount, instance->root);
+
+ err = ceph_unmount(instance->cmount);
+ if (err < 0) {
+ return proxy_log(LOG_ERR, -err,
+ "ceph_unmount() failed");
+ }
+ } else {
+ *pinstance = sibling;
+ }
+
+ return 0;
+}
+
+int32_t proxy_mount_create(proxy_mount_t **pmount, const char *id)
+{
+ proxy_mount_t *mount;
+ int32_t err;
+
+ mount = proxy_malloc(sizeof(proxy_mount_t));
+ if (mount == NULL) {
+ return -ENOMEM;
+ }
+ mount->root = NULL;
+
+ err = proxy_instance_create(&mount->instance, id);
+ if (err < 0) {
+ proxy_free(mount);
+ return err;
+ }
+
+ *pmount = mount;
+
+ return 0;
+}
+
+int32_t proxy_mount_config(proxy_mount_t *mount, const char *config)
+{
+ return proxy_instance_config(mount->instance, config);
+}
+
+int32_t proxy_mount_set(proxy_mount_t *mount, const char *name,
+ const char *value)
+{
+ return proxy_instance_option_set(mount->instance, name, value);
+}
+
+int32_t proxy_mount_get(proxy_mount_t *mount, const char *name, char *value,
+ size_t size)
+{
+ return proxy_instance_option_get(mount->instance, name, value, size);
+}
+
+int32_t proxy_mount_select(proxy_mount_t *mount, const char *fs)
+{
+ return proxy_instance_select(mount->instance, fs);
+}
+
+int32_t proxy_mount_init(proxy_mount_t *mount)
+{
+ return proxy_instance_init(mount->instance);
+}
+
+int32_t proxy_mount_mount(proxy_mount_t *mount, const char *root)
+{
+ struct ceph_statx stx;
+ struct ceph_mount_info *cmount;
+ int32_t err;
+
+ err = proxy_instance_mount(&mount->instance);
+ if (err < 0) {
+ return err;
+ }
+
+ cmount = proxy_cmount(mount);
+
+ mount->perms = ceph_mount_perms(cmount);
+
+ if (root == NULL) {
+ root = "/";
+ }
+
+ /* Temporarily set the root and cwd inodes to make proxy_path_resolve()
+ * to work correctly. */
+ mount->root = mount->instance->root;
+ mount->root_ino = CEPH_INO_ROOT;
+
+ mount->cwd = mount->instance->root;
+ mount->cwd_ino = CEPH_INO_ROOT;
+
+ /* Resolve the desired root directory. */
+ err = proxy_path_resolve(mount, root, &mount->root, &stx,
+ CEPH_STATX_ALL_STATS, 0, mount->perms, NULL);
+ if (err < 0) {
+ goto failed;
+ }
+ if (!S_ISDIR(stx.stx_mode)) {
+ err = proxy_log(LOG_ERR, ENOTDIR,
+ "The root path is not a directory");
+ goto failed_root;
+ }
+
+ mount->cwd_path = proxy_strdup("/");
+ if (mount->cwd_path == NULL) {
+ err = -ENOMEM;
+ goto failed_root;
+ }
+ mount->cwd_path_len = 1;
+
+ mount->root_ino = stx.stx_ino;
+
+ err = proxy_inode_ref(mount, stx.stx_ino);
+ if (err < 0) {
+ goto failed_path;
+ }
+
+ mount->cwd = mount->root;
+ mount->cwd_ino = stx.stx_ino;
+
+ return 0;
+
+failed_path:
+ proxy_free(mount->cwd_path);
+
+failed_root:
+ ceph_ll_put(proxy_cmount(mount), mount->root);
+
+failed:
+ proxy_instance_unmount(&mount->instance);
+
+ return err;
+}
+
+int32_t proxy_mount_unmount(proxy_mount_t *mount)
+{
+ ceph_ll_put(proxy_cmount(mount), mount->root);
+ mount->root = NULL;
+ mount->root_ino = 0;
+
+ ceph_ll_put(proxy_cmount(mount), mount->cwd);
+ mount->cwd = NULL;
+ mount->cwd_ino = 0;
+
+ proxy_free(mount->cwd_path);
+
+ return proxy_instance_unmount(&mount->instance);
+}
+
+int32_t proxy_mount_release(proxy_mount_t *mount)
+{
+ int32_t err;
+
+ err = proxy_instance_release(mount->instance);
+ if (err >= 0) {
+ proxy_free(mount);
+ }
+
+ return err;
+}