summaryrefslogtreecommitdiffstats
path: root/src/crush
diff options
context:
space:
mode:
authorLoic Dachary <ldachary@redhat.com>2017-01-31 18:33:37 +0100
committerLoic Dachary <ldachary@redhat.com>2017-01-31 18:33:37 +0100
commit08bb5c5b488b693bc6cd56170726df889430333d (patch)
tree31e7d76444eeef633dfbcbabb63d5bd77a508eaf /src/crush
parentMerge pull request #13164 from dzafman/wip-18718 (diff)
downloadceph-08bb5c5b488b693bc6cd56170726df889430333d.tar.xz
ceph-08bb5c5b488b693bc6cd56170726df889430333d.zip
crush: API documentation
Signed-off-by: Loic Dachary <loic@dachary.org>
Diffstat (limited to 'src/crush')
-rw-r--r--src/crush/builder.h211
-rw-r--r--src/crush/crush.h243
-rw-r--r--src/crush/mapper.h60
3 files changed, 464 insertions, 50 deletions
diff --git a/src/crush/builder.h b/src/crush/builder.h
index b436e3eb3cd..4424e1c21c9 100644
--- a/src/crush/builder.h
+++ b/src/crush/builder.h
@@ -3,24 +3,235 @@
#include "crush.h"
+/** @ingroup API
+ *
+ * Allocate a crush_map with __malloc(3)__ and initialize it. The
+ * caller is responsible for deallocating the crush_map with
+ * crush_destroy().
+ *
+ * The content of the allocated crush_map is undefined and
+ * it is the responsibility of the caller to set meaningful values.
+ *
+ * The recommended values are:
+ *
+ * struct crush_map *m = crush_create();
+ * m->choose_local_tries = 0;
+ * m->choose_local_fallback_tries = 0;
+ * m->choose_total_tries = 50;
+ * m->chooseleaf_descend_once = 1;
+ * m->chooseleaf_vary_r = 1;
+ * m->chooseleaf_stable = 1;
+ * m->allowed_bucket_algs =
+ * (1 << CRUSH_BUCKET_UNIFORM) |
+ * (1 << CRUSH_BUCKET_LIST) |
+ * (1 << CRUSH_BUCKET_STRAW2);
+ *
+ * @returns a pointer to the newly created crush_map or NULL
+ */
extern struct crush_map *crush_create();
+/** @ingroup API
+ *
+ * Analyze the content of __map__ and set the internal values required
+ * before it can be used to map values with crush_do_rule(). The caller
+ * must make sure it is run before crush_do_rule() and after any
+ * function that modifies the __map__ (crush_add_bucket(), etc.).
+ *
+ * @param map the crush_map
+ */
extern void crush_finalize(struct crush_map *map);
/* rules */
+/** @ingroup API
+ *
+ * Allocate an empty crush_rule structure large enough to store __len__ steps.
+ * Steps can be added to a rule via crush_rule_set_step(). The __ruleset__
+ * is a user defined integer, not used by __libcrush__ and stored in
+ * the allocated rule at __rule->ruleset__.
+ *
+ * The rule is designed to allow crush_do_rule() to get at least __minsize__ items
+ * and at most __maxsize__ items.
+ *
+ * The __type__ is defined by the caller and will be used by
+ * crush_find_rule() when looking for a rule and by
+ * __CRUSH_RULE_CHOOSE*__ steps when looking for items.
+ *
+ * If __malloc(3)__ fails, return NULL.
+ *
+ * @param len number of steps in the rule
+ * @param ruleset user defined value
+ * @param type user defined value
+ * @param minsize minimum number of items the rule can map
+ * @param maxsize maximum number of items the rule can map
+ *
+ * @returns a pointer to the newly created rule or NULL
+ */
extern struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize);
+/** @ingroup API
+ *
+ * Set the __pos__ step of the __rule__ to an operand and up to two arguments.
+ * The value of the operand __op__ determines if the arguments are used and how:
+ *
+ * - __CRUSH_RULE_NOOP__ do nothing.
+ * - __CRUSH_RULE_TAKE__ select the __arg1__ item
+ * - __CRUSH_RULE_CHOOSE_FIRSTN__ and __CRUSH_RULE_CHOOSE_INDEP__
+ * recursively explore each bucket currently selected, looking for
+ * __arg1__ items of type __arg2__ and select them.
+ * - __CRUSH_RULE_CHOOSELEAF_FIRSTN__ and __CRUSH_RULE_CHOOSELEAF_INDEP__
+ * recursively explore each bucket currently selected, looking for
+ * __arg1__ leaves within all the buckets of type __arg2__ and
+ * select them.
+ * - __CRUSH_RULE_EMIT__ append the selection to the results and clear
+ * the selection
+ *
+ * In all __CHOOSE__ steps, if __arg1__ is zero, the number of items
+ * to select is determined by the __max_result__ argument of
+ * crush_do_rule(), i.e. __arg1__ is __max_result__ minus the number of
+ * items already in the result.
+ *
+ * @param rule the rule in which the step is inserted
+ * @param pos the zero based step index
+ * @param op one of __CRUSH_RULE_NOOP__, __CRUSH_RULE_TAKE__, __CRUSH_RULE_CHOOSE_FIRSTN__, __CRUSH_RULE_CHOOSE_INDEP__, __CRUSH_RULE_CHOOSELEAF_FIRSTN__, __CRUSH_RULE_CHOOSELEAF_INDEP__ or __CRUSH_RULE_EMIT__
+ * @param arg1 first argument for __op__
+ * @param arg2 second argument for __op__
+ */
extern void crush_rule_set_step(struct crush_rule *rule, int pos, int op, int arg1, int arg2);
+/** @ingroup API
+ *
+ * Add the __rule__ into the crush __map__ and assign it the
+ * __ruleno__ unique identifier. If __ruleno__ is -1, the function will
+ * assign the lowest available identifier. The __ruleno__ value must be
+ * a positive integer lower than __CRUSH_MAX_RULES__.
+ *
+ * - return -ENOSPC if the rule identifier is >= __CRUSH_MAX_RULES__
+ * - return -ENOMEM if __realloc(3)__ fails to expand the array of
+ * rules in the __map__
+ *
+ * @param map the crush_map
+ * @param rule the rule to add to the __map__
+ * @param ruleno a positive integer < __CRUSH_MAX_RULES__ or -1
+ *
+ * @returns the rule unique identifier on success, < 0 on error
+ */
extern int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno);
/* buckets */
extern int crush_get_next_bucket_id(struct crush_map *map);
+/** @ingroup API
+ *
+ * Add __bucket__ into the crush __map__ and assign it the
+ * __bucketno__ unique identifier. If __bucketno__ is -1, the function
+ * will assign the lowest available identifier. The bucket identifier
+ * must be a negative integer. The bucket identifier is returned via
+ * __idout__.
+ *
+ * - return -ENOMEM if __realloc(3)__ fails to expand the array of
+ * buckets in the __map__
+ * - return -EEXIST if the __bucketno__ identifier is already assigned
+ * to another bucket.
+ *
+ * @param[in] map the crush_map
+ * @param[in] bucketno the bucket unique identifer or -1
+ * @param[in] bucket the bucket to add to the __map__
+ * @param[out] idout a pointer to the bucket identifier
+ *
+ * @returns 0 on success, < 0 on error
+ */
extern int crush_add_bucket(struct crush_map *map,
int bucketno,
struct crush_bucket *bucket, int *idout);
+/** @ingroup API
+ *
+ * Allocate a crush_bucket with __malloc(3)__ and initialize it. The
+ * content of the bucket is filled with __size__ items from
+ * __items__. The item selection is set to use __alg__ which is one of
+ * ::CRUSH_BUCKET_UNIFORM , ::CRUSH_BUCKET_LIST or
+ * ::CRUSH_BUCKET_STRAW2. The initial __items__ are assigned a
+ * weight from the __weights__ array, depending on the value of
+ * __alg__. If __alg__ is ::CRUSH_BUCKET_UNIFORM, all items are set
+ * to have a weight equal to __weights[0]__, otherwise the weight of
+ * __items[x]__ is set to be the value of __weights[x]__.
+ *
+ * @param map __unused__
+ * @param alg algorithm for item selection
+ * @param hash always set to CRUSH_HASH_RJENKINS1
+ * @param type user defined bucket type
+ * @param size of the __items__ array
+ * @param items array of __size__ items
+ * @param weights the weight of each item in __items__, depending on __alg__
+ */
struct crush_bucket *crush_make_bucket(struct crush_map *map, int alg, int hash, int type, int size, int *items, int *weights);
+/** @ingroup API
+ *
+ * Add __item__ to __bucket__ with __weight__. The weight of the new
+ * item is added to the weight of the bucket so that it reflects
+ * the total weight of all items.
+ *
+ * If __bucket->alg__ is ::CRUSH_BUCKET_UNIFORM, the value of __weight__ must be equal to
+ * __(struct crush_bucket_uniform *)bucket->item_weight__.
+ *
+ * - return -ENOMEN if the __bucket__ cannot be resized with __realloc(3)__.
+ * - return -ERANGE if adding __weight__ to the weight of the bucket overflows.
+ * - return -1 if the value of __bucket->alg__ is unknown.
+ *
+ * @returns 0 on success, < 0 on error
+ */
extern int crush_bucket_add_item(struct crush_map *map, struct crush_bucket *bucket, int item, int weight);
+/** @ingroup API
+ *
+ * If __bucket->alg__ is ::CRUSH_BUCKET_UNIFORM,
+ * __(struct crush_bucket_uniform *)bucket->item_weight__ is set to __weight__ and the
+ * weight of the bucket is set to be the number of items in the bucket times the weight.
+ * The return value is the difference between the new bucket weight and the former
+ * bucket weight. The __item__ argument is ignored.
+ *
+ * If __bucket->alg__ is different from ::CRUSH_BUCKET_UNIFORM,
+ * set the __weight__ of __item__ in __bucket__. The former weight of the
+ * item is subtracted from the weight of of the bucket and the new weight is added.
+ * The return value is the difference between the new item weight and the former
+ * item weight.
+ *
+ * @returns the difference between the new weight and the former weight
+ */
extern int crush_bucket_adjust_item_weight(struct crush_map *map, struct crush_bucket *bucket, int item, int weight);
+/** @ingroup API
+ *
+ * Recursively update the weight of __bucket__ and its children, deep
+ * first. The __bucket__ weight is set to the sum of the weight of the
+ * items it contains.
+ *
+ * - return -ERANGE if the sum of the weight of the items in __bucket__ overflows.
+ * - return -1 if the value of __bucket->alg__ is unknown.
+ *
+ * @param crush a crush_map containing __bucket__
+ * @param bucket the root of the tree to reweight
+ * @returns 0 on success, < 0 on error
+ */
extern int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *bucket);
+/** @ingroup API
+ *
+ * Remove __bucket__ from __map__ and deallocate it via crush_destroy_bucket().
+ * __assert(3)__ that __bucket__ is in __map__. The caller is responsible for
+ * making sure the bucket is not the child of any other bucket in the __map__.
+ *
+ * @param map a crush_map containing __bucket__
+ * @param bucket the bucket to remove from __map__
+ * @returns 0
+ */
extern int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket);
+/** @ingroup API
+ *
+ * Remove __item__ from __bucket__ and subtract the item weight from
+ * the bucket weight. If the weight of the item is greater than the
+ * weight of the bucket, silentely set the bucket weight to zero.
+ *
+ * - return -ENOMEN if the __bucket__ cannot be sized down with __realloc(3)__.
+ * - return -1 if the value of __bucket->alg__ is unknown.
+ *
+ * @param map __unused__
+ * @param bucket the bucket from which __item__ is removed
+ * @param item the item to remove from __bucket__
+ * @returns 0 on success, < 0 on error
+ */
extern int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *bucket, int item);
struct crush_bucket_uniform *
diff --git a/src/crush/crush.h b/src/crush/crush.h
index d2c235af690..234f81b6585 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -31,7 +31,10 @@
#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u)
#define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */
-#define CRUSH_ITEM_NONE 0x7fffffff /* no result */
+/** @ingroup API
+ * The equivalent of NULL for an item, i.e. the absence of an item.
+ */
+#define CRUSH_ITEM_NONE 0x7fffffff
/*
* CRUSH uses user-defined "rules" to describe how inputs should be
@@ -44,8 +47,11 @@ struct crush_rule_step {
__s32 arg2;
};
-/* step op codes */
-enum {
+/** @ingroup API
+ */
+enum crush_opcodes {
+ /*! do nothing
+ */
CRUSH_RULE_NOOP = 0,
CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
@@ -95,24 +101,92 @@ struct crush_rule {
/*
* A bucket is a named container of other items (either devices or
- * other buckets). Items within a bucket are chosen using one of a
- * few different algorithms. The table summarizes how the speed of
- * each option measures up against mapping stability when items are
- * added or removed.
+ * other buckets).
+ */
+
+/** @ingroup API
*
- * Bucket Alg Speed Additions Removals
- * ------------------------------------------------
- * uniform O(1) poor poor
- * list O(n) optimal poor
- * tree O(log n) good good
- * straw O(n) better better
- * straw2 O(n) optimal optimal
+ * Items within a bucket are chosen with crush_do_rule() using one of
+ * three algorithms representing a tradeoff between performance and
+ * reorganization efficiency. If you are unsure of which bucket type
+ * to use, we recommend using ::CRUSH_BUCKET_STRAW2.
+ *
+ * The table summarizes how the speed of each option measures up
+ * against mapping stability when items are added or removed.
+ *
+ * Bucket Alg Speed Additions Removals
+ * ------------------------------------------------
+ * uniform O(1) poor poor
+ * list O(n) optimal poor
+ * straw2 O(n) optimal optimal
*/
-enum {
+enum crush_algorithm {
+ /*!
+ * Devices are rarely added individually in a large system.
+ * Instead, new storage is typically deployed in blocks of identical
+ * devices, often as an additional shelf in a server rack or perhaps
+ * an entire cabinet. Devices reaching their end of life are often
+ * similarly decommissioned as a set (individual failures aside),
+ * making it natural to treat them as a unit. CRUSH uniform buckets
+ * are used to represent an identical set of devices in such
+ * circumstances. The key advantage in doing so is performance
+ * related: CRUSH can map replicas into uniform buckets in constant
+ * time. In cases where the uniformity restrictions are not
+ * appropriate, other bucket types can be used. If the size of a
+ * uniform bucket changes, there is a complete reshuffling of data
+ * between devices, much like conventional hash-based distribution
+ * strategies.
+ */
CRUSH_BUCKET_UNIFORM = 1,
+ /*!
+ * List buckets structure their contents as a linked list, and
+ * can contain items with arbitrary weights. To place a
+ * replica, CRUSH begins at the head of the list with the most
+ * recently added item and compares its weight to the sum of
+ * all remaining items’ weights. Depending on the value of
+ * hash( x , r , item), either the current item is chosen with
+ * the appropriate probability, or the process continues
+ * recursively down the list. This is a natural and intuitive
+ * choice for an expanding cluster: either an object is
+ * relocated to the newest device with some appropriate
+ * probability, or it remains on the older devices as before.
+ * The result is optimal data migration when items are added
+ * to the bucket. Items removed from the middle or tail of the
+ * list, however, can result in a significant amount of
+ * unnecessary movement, making list buckets most suitable for
+ * circumstances in which they never (or very rarely) shrink.
+ */
CRUSH_BUCKET_LIST = 2,
+ /*! @cond INTERNAL */
CRUSH_BUCKET_TREE = 3,
CRUSH_BUCKET_STRAW = 4,
+ /*! @endcond */
+ /*!
+ * List and tree buckets are structured such that a limited
+ * number of hash values need to be calculated and compared to
+ * weights in order to select a bucket item. In doing so,
+ * they divide and conquer in a way that either gives certain
+ * items precedence (e. g., those at the beginning of a list)
+ * or obviates the need to consider entire subtrees of items
+ * at all. That im- proves the performance of the replica
+ * placement process, but can also introduce suboptimal
+ * reorganization behavior when the contents of a bucket
+ * change due an addition, removal, or re-weighting of an
+ * item.
+ *
+ * The straw2 bucket type allows all items to fairly “compete”
+ * against each other for replica placement through a process
+ * analogous to a draw of straws. To place a replica, a straw
+ * of random length is drawn for each item in the bucket. The
+ * item with the longest straw wins. The length of each straw
+ * is initially a value in a fixed range. Each straw length
+ * is scaled by a factor based on the item’s weight so that
+ * heavily weighted items are more likely to win the draw.
+ * Although this process is almost twice as slow (on average)
+ * than a list bucket and even slower than a tree bucket
+ * (which scales logarithmically), straw2 buckets result in
+ * optimal data movement between nested items when modified.
+ */
CRUSH_BUCKET_STRAW2 = 5,
};
extern const char *crush_bucket_alg_name(int alg);
@@ -126,27 +200,66 @@ extern const char *crush_bucket_alg_name(int alg);
(1 << CRUSH_BUCKET_LIST) | \
(1 << CRUSH_BUCKET_STRAW))
+/** @ingroup API
+ *
+ * A bucket contains __size__ __items__ which are either positive
+ * numbers or negative numbers that reference other buckets and is
+ * uniquely identified with __id__ which is a negative number. The
+ * __weight__ of a bucket is the cumulative weight of all its
+ * children. A bucket is assigned a ::crush_algorithm that is used by
+ * crush_do_rule() to draw an item depending on its weight. A bucket
+ * can be assigned a strictly positive (> 0) __type__ defined by the
+ * caller. The __type__ can be used by crush_do_rule(), when it is
+ * given as an argument of a rule step.
+ *
+ * A pointer to crush_bucket can safely be cast into the following
+ * structure, depending on the value of __alg__:
+ *
+ * - __alg__ == ::CRUSH_BUCKET_UNIFORM cast to crush_bucket_uniform
+ * - __alg__ == ::CRUSH_BUCKET_LIST cast to crush_bucket_list
+ * - __alg__ == ::CRUSH_BUCKET_STRAW2 cast to crush_bucket_straw2
+ *
+ * The weight of each item depends on the algorithm and the
+ * information about it is available in the corresponding structure
+ * (crush_bucket_uniform, crush_bucket_list or crush_bucket_straw2).
+ *
+ * See crush_map for more information on how __id__ is used
+ * to reference the bucket.
+ */
struct crush_bucket {
- __s32 id; /* this'll be negative */
- __u16 type; /* non-zero; type=0 is reserved for devices */
- __u8 alg; /* one of CRUSH_BUCKET_* */
+ __s32 id; /*!< bucket identifier, < 0 and unique within a crush_map */
+ __u16 type; /*!< > 0 bucket type, defined by the caller */
+ __u8 alg; /*!< the item selection ::crush_algorithm */
+ /*! @cond INTERNAL */
__u8 hash; /* which hash function to use, CRUSH_HASH_* */
- __u32 weight; /* 16-bit fixed point */
- __u32 size; /* num items */
- __s32 *items;
-
+ /*! @endcond */
+ __u32 weight; /*!< 16.16 fixed point cumulated children weight */
+ __u32 size; /*!< size of the __items__ array */
+ __s32 *items; /*!< array of children: < 0 are buckets, >= 0 items */
};
+/** @ingroup API
+ * The weight of each item in the bucket when
+ * __h.alg__ == ::CRUSH_BUCKET_UNIFORM.
+ */
struct crush_bucket_uniform {
- struct crush_bucket h;
- __u32 item_weight; /* 16-bit fixed point; all items equally weighted */
+ struct crush_bucket h; /*!< generic bucket information */
+ __u32 item_weight; /*!< 16.16 fixed point weight for each item */
};
+/** @ingroup API
+ * The weight of each item in the bucket when
+ * __h.alg__ == ::CRUSH_BUCKET_LIST.
+ *
+ * The weight of __h.items[i]__ is __item_weights[i]__ for i in
+ * [0,__h.size__[. The __sum_weight__[i] is the sum of the __item_weights[j]__
+ * for j in [0,i[.
+ *
+ */
struct crush_bucket_list {
- struct crush_bucket h;
- __u32 *item_weights; /* 16-bit fixed point */
- __u32 *sum_weights; /* 16-bit fixed point. element i is sum
- of weights 0..i, inclusive */
+ struct crush_bucket h; /*!< generic bucket information */
+ __u32 *item_weights; /*!< 16.16 fixed point weight for each item */
+ __u32 *sum_weights; /*!< 16.16 fixed point sum of the weights */
};
struct crush_bucket_tree {
@@ -162,48 +275,71 @@ struct crush_bucket_straw {
__u32 *straws; /* 16-bit fixed point */
};
+/** @ingroup API
+ * The weight of each item in the bucket when
+ * __h.alg__ == ::CRUSH_BUCKET_STRAW2.
+ *
+ * The weight of __h.items[i]__ is __item_weights[i]__ for i in
+ * [0,__h.size__[.
+ */
struct crush_bucket_straw2 {
- struct crush_bucket h;
- __u32 *item_weights; /* 16-bit fixed point */
+ struct crush_bucket h; /*!< generic bucket information */
+ __u32 *item_weights; /*!< 16.16 fixed point weight for each item */
};
-/*
- * CRUSH map includes all buckets, rules, etc.
+/** @ingroup API
+ *
+ * A crush map define a hierarchy of crush_bucket that end with leaves
+ * (buckets and leaves are called items) and a set of crush_rule to
+ * map an integer to items with the crush_do_rule() function.
+ *
*/
struct crush_map {
+ /*! An array of crush_bucket pointers of size __max_buckets__.
+ * An element of the array may be NULL if the bucket was removed with
+ * crush_remove_bucket(). The buckets must be added with crush_add_bucket().
+ * The bucket found at __buckets[i]__ must have a crush_bucket.id == -1-i.
+ */
struct crush_bucket **buckets;
+ /*! An array of crush_rule pointers of size __max_rules__.
+ * An element of the array may be NULL if the rule was removed (there is
+ * no API to do so but there may be one in the future). The rules must be added
+ * with crush_add_rule().
+ */
struct crush_rule **rules;
-
- __s32 max_buckets;
- __u32 max_rules;
+ __s32 max_buckets; /*!< the size of __buckets__ */
+ __u32 max_rules; /*!< the size of __rules__ */
+ /*! The value of the highest item stored in the crush_map + 1
+ */
__s32 max_devices;
- /* choose local retries before re-descent */
+ /*! choose local retries before re-descent */
__u32 choose_local_tries;
- /* choose local attempts using a fallback permutation before
- * re-descent */
+ /*! choose local attempts using a fallback permutation before
+ *! re-descent */
__u32 choose_local_fallback_tries;
- /* choose attempts before giving up */
+ /*! choose attempts before giving up */
__u32 choose_total_tries;
- /* attempt chooseleaf inner descent once for firstn mode; on
- * reject retry outer descent. Note that this does *not*
- * apply to a collision: in that case we will retry as we used
- * to. */
+ /*! attempt chooseleaf inner descent once for firstn mode; on
+ *! reject retry outer descent. Note that this does *not*
+ *! apply to a collision: in that case we will retry as we used
+ *! to. */
__u32 chooseleaf_descend_once;
- /* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1)
- * bits. a value of 1 is best for new clusters. for legacy clusters
- * that want to limit reshuffling, a value of 3 or 4 will make the
- * mappings line up a bit better with previous mappings. */
+ /*! if non-zero, feed r into chooseleaf, bit-shifted right by (r-1)
+ *! bits. a value of 1 is best for new clusters. for legacy clusters
+ *! that want to limit reshuffling, a value of 3 or 4 will make the
+ *! mappings line up a bit better with previous mappings. */
__u8 chooseleaf_vary_r;
- /* if true, it makes chooseleaf firstn to return stable results (if
- * no local retry) so that data migrations would be optimal when some
- * device fails. */
+ /*! if true, it makes chooseleaf firstn to return stable results (if
+ *! no local retry) so that data migrations would be optimal when some
+ *! device fails. */
__u8 chooseleaf_stable;
+ /*! @cond INTERNAL */
/* This value is calculated after decode or construction by
the builder. It is exposed here (rather than having a
'build CRUSH working space' function) so that callers can
@@ -235,6 +371,7 @@ struct crush_map {
__u32 *choose_tries;
#endif
+ /*! @endcond */
};
@@ -247,6 +384,12 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
+/** @ingroup API
+ *
+ * Deallocate the __map__, previously allocated with crush_create.
+ *
+ * @param map the crush map
+ */
extern void crush_destroy(struct crush_map *map);
static inline int crush_calc_tree_node(int i)
diff --git a/src/crush/mapper.h b/src/crush/mapper.h
index b9a614d1476..8de37e82928 100644
--- a/src/crush/mapper.h
+++ b/src/crush/mapper.h
@@ -11,6 +11,66 @@
#include "crush.h"
extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+/** @ingroup API
+ *
+ * Map __x__ to __result_max__ items and store them in the __result__
+ * array. The mapping is done by following each step of the rule
+ * __ruleno__. See crush_make_rule(), crush_rule_set_step() and
+ * crush_add_rule() for more information on how the rules are created,
+ * populated and added to the crush __map__.
+ *
+ * The return value is the the number of items in the __result__
+ * array. If the caller asked for __result_max__ items and the return
+ * value is X where X < __result_max__, the content of __result[0,X[__
+ * is defined but the content of __result[X,result_max[__ is
+ * undefined. For example:
+ *
+ * crush_do_rule(map, ruleno=1, x=1, result, result_max=3,...) == 1
+ * result[0] is set
+ * result[1] is undefined
+ * result[2] is undefined
+ *
+ * An entry in the __result__ array is either an item in the crush
+ * __map__ or ::CRUSH_ITEM_NONE if no item was found. For example:
+ *
+ * crush_do_rule(map, ruleno=1, x=1, result, result_max=4,...) == 2
+ * result[0] is CRUSH_ITEM_NONE
+ * result[1] is item number 5
+ * result[2] is undefined
+ * result[3] is undefined
+ *
+ * The __weight__ array contains the probabilities that a leaf is
+ * ignored even if it is selected. It is a 16.16 fixed point
+ * number in the range [0x00000,0x10000]. The lower the value, the
+ * more often the leaf is ignored. For instance:
+ *
+ * - weight[leaf] == 0x00000 == 0.0 always ignore
+ * - weight[leaf] == 0x10000 == 1.0 never ignore
+ * - weight[leaf] == 0x08000 == 0.5 ignore 50% of the time
+ * - weight[leaf] == 0x04000 == 0.25 ignore 75% of the time
+ * - etc.
+ *
+ * During mapping, each leaf is checked against the __weight__ array,
+ * using the leaf as an index. If there is no entry in __weight__ for
+ * the leaf, it is ignored. If there is an entry, the leaf will be
+ * ignored some of the time, depending on the probability.
+ *
+ * The __cwin__ argument must be set as follows:
+ *
+ * char __cwin__[crush_work_size(__map__, __result_max__)];
+ * crush_init_workspace(__map__, __cwin__);
+ *
+ * @param map the crush_map
+ * @param ruleno a positive integer < __CRUSH_MAX_RULES__
+ * @param x the value to map to __result_max__ items
+ * @param result an array of items of size __result_max__
+ * @param result_max the size of the __result__ array
+ * @param weights an array of weights of size __weight_max__
+ * @param weight_max the size of the __weights__ array
+ * @param cwin must be the value of crush_work_size(__map__, __result_max__)
+ *
+ * @return 0 on error or the size of __result__ on success
+ */
extern int crush_do_rule(const struct crush_map *map,
int ruleno,
int x, int *result, int result_max,