summaryrefslogtreecommitdiffstats
path: root/src/kernel/mon_client.h
blob: 5b05b243523526213bb3c52b17265885e0722912 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#ifndef _FS_CEPH_MON_CLIENT_H
#define _FS_CEPH_MON_CLIENT_H

#include "messenger.h"
#include <linux/completion.h>
#include <linux/radix-tree.h>

/*
 * A small cluster of Ceph "monitors" are responsible for managing critical
 * cluster configuration and state information.  An odd number (e.g., 3, 5)
 * of cmon daemons use a modified version of the Paxos part-time parliament
 * algorithm to manage the MDS map (mds cluster membership), OSD map, and
 * list of clients who have mounted the file system.
 *
 * Communication with the monitor cluster is lossy, so requests for
 * information may have to be resent if we time out waiting for a response.
 * As long as we do not time out, we continue to send all requests to the
 * same monitor.  If there is a problem, we randomly pick a new monitor from
 * the cluster to try.
 */

struct ceph_client;
struct ceph_mount_args;

/*
 * The monitor map enumerates the set of all monitors.
 *
 * Make sure this structure size matches the encoded map size, or change
 * ceph_monmap_decode().
 */
struct ceph_monmap {
	ceph_fsid_t fsid;
	u32 epoch;
	u32 num_mon;
	struct ceph_entity_inst mon_inst[0];
};

struct ceph_mon_client;

/*
 * Generic mechanism for resending monitor requests.
 */
typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
					 int newmon);
struct ceph_mon_request_type {
	struct ceph_mon_client *monc;
	struct delayed_work delayed_work;
	unsigned long delay;
	ceph_monc_request_func_t do_request;
};

/* statfs() is done a bit differently */
struct ceph_mon_statfs_request {
	u64 tid;
	int result;
	struct ceph_statfs *buf;
	struct completion completion;
	unsigned long last_attempt, delay; /* jiffies */
};

struct ceph_mon_client {
	struct ceph_client *client;
	int last_mon;                       /* last monitor i contacted */
	struct ceph_monmap *monmap;

	/* pending statfs requests */
	struct mutex statfs_mutex;
	struct radix_tree_root statfs_request_tree;
	int num_statfs_requests;
	u64 last_tid;
	struct delayed_work statfs_delayed_work;

	/* mds/osd map or umount requests */
	struct mutex req_mutex;
	struct ceph_mon_request_type mdsreq, osdreq, umountreq;
	u32 want_mdsmap;
	u32 want_osdmap;
};

extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
extern int ceph_monmap_contains(struct ceph_monmap *m,
				struct ceph_entity_addr *addr);

extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
extern void ceph_monc_stop(struct ceph_mon_client *monc);

/*
 * The model here is to indicate that we need a new map of at least epoch
 * @want, and to indicate which maps receive.  Periodically rerequest the map
 * from the monitor cluster until we get what we want.
 */
extern void ceph_monc_request_mdsmap(struct ceph_mon_client *monc, u32 want);
extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);

extern void ceph_monc_request_osdmap(struct ceph_mon_client *monc, u32 want);
extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);

extern void ceph_monc_request_umount(struct ceph_mon_client *monc);

extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
			       struct ceph_statfs *buf);
extern void ceph_monc_handle_statfs_reply(struct ceph_mon_client *monc,
					  struct ceph_msg *msg);

extern void ceph_monc_request_umount(struct ceph_mon_client *monc);
extern void ceph_monc_handle_umount(struct ceph_mon_client *monc,
				    struct ceph_msg *msg);

#endif