* the sender.
*/
+/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
+
+#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
}
EXPORT_SYMBOL(ceph_msgr_flush);
+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+}
/*
* socket callback functions
* buffer. See net/ipv4/tcp_input.c:tcp_check_space()
* and net/core/stream.c:sk_stream_write_space().
*/
- if (test_bit(WRITE_PENDING, &con->state)) {
+ if (test_bit(WRITE_PENDING, &con->flags)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
dout("%s %p queueing write work\n", __func__, con);
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
dout("%s TCP_CLOSE\n", __func__);
case TCP_CLOSE_WAIT:
dout("%s TCP_CLOSE_WAIT\n", __func__);
- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
+ con_sock_state_closing(con);
+ if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
if (test_bit(CONNECTING, &con->state))
con->error_msg = "connection failed";
else
break;
case TCP_ESTABLISHED:
dout("%s TCP_ESTABLISHED\n", __func__);
+ con_sock_state_connected(con);
queue_con(con);
break;
default: /* Everything else is uninteresting */
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+ con_sock_state_connecting(con);
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
return ret;
}
con->sock = sock;
-
return 0;
}
sock_release(con->sock);
con->sock = NULL;
clear_bit(SOCK_CLOSED, &con->state);
+ con_sock_state_closed(con);
return rc;
}
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ ceph_con_put(msg->con);
+ msg->con = NULL;
+
ceph_msg_put(msg);
}
static void ceph_msg_remove_list(struct list_head *head)
ceph_msg_remove_list(&con->out_sent);
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ ceph_con_put(con->in_msg->con);
}
con->connect_seq = 0;
{
dout("con_close %p peer %s\n", con,
ceph_pr_addr(&con->peer_addr.in_addr));
- set_bit(CLOSED, &con->state); /* in case there's queued work */
+ clear_bit(NEGOTIATING, &con->state);
clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
- clear_bit(KEEPALIVE_PENDING, &con->state);
- clear_bit(WRITE_PENDING, &con->state);
+ set_bit(CLOSED, &con->state);
+
+ clear_bit(LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(KEEPALIVE_PENDING, &con->flags);
+ clear_bit(WRITE_PENDING, &con->flags);
+
mutex_lock(&con->mutex);
reset_connection(con);
con->peer_global_seq = 0;
{
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
set_bit(OPENING, &con->state);
- clear_bit(CLOSED, &con->state);
+ WARN_ON(!test_and_clear_bit(CLOSED, &con->state));
+
memcpy(&con->peer_addr, addr, sizeof(*addr));
con->delay = 0; /* reset backoff memory */
queue_con(con);
/*
* initialize a new connection.
*/
-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
+void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr, __u8 entity_type, __u64 entity_num)
{
dout("con_init %p\n", con);
memset(con, 0, sizeof(*con));
+ con->private = private;
+ con->ops = ops;
atomic_set(&con->nref, 1);
con->msgr = msgr;
+
+ con_sock_state_init(con);
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
INIT_DELAYED_WORK(&con->work, con_work);
+
+ set_bit(CLOSED, &con->state);
}
EXPORT_SYMBOL(ceph_con_init);
&con->out_temp_ack);
}
+ BUG_ON(list_empty(&con->out_queue));
m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
con->out_msg = m;
+ BUG_ON(m->con != con);
/* put message on sent list */
ceph_msg_get(m);
m->hdr.seq = cpu_to_le64(++con->out_seq);
m->needs_out_seq = false;
}
+#ifdef CONFIG_BLOCK
+ else
+ m->bio_iter = NULL;
+#endif
dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
prepare_write_message_footer(con);
}
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(WRITE_PENDING, &con->flags);
}
/*
&con->out_temp_ack);
con->out_more = 1; /* more will follow.. eventually.. */
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(WRITE_PENDING, &con->flags);
}
/*
dout("prepare_write_keepalive %p\n", con);
con_out_kvec_reset(con);
con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(WRITE_PENDING, &con->flags);
}
/*
if (IS_ERR(auth))
return auth;
- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
+ if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->flags))
return ERR_PTR(-EAGAIN);
con->auth_reply_buf = auth->authorizer_reply_buf;
&con->msgr->my_enc_addr);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(WRITE_PENDING, &con->flags);
}
static int prepare_write_connect(struct ceph_connection *con)
{
- unsigned global_seq = get_global_seq(con->msgr, 0);
+ unsigned int global_seq = get_global_seq(con->msgr, 0);
int proto;
int auth_proto;
struct ceph_auth_handshake *auth;
auth->authorizer_buf);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(WRITE_PENDING, &con->flags);
return 0;
}
static int write_partial_msg_pages(struct ceph_connection *con)
{
struct ceph_msg *msg = con->out_msg;
- unsigned data_len = le32_to_cpu(msg->hdr.data_len);
+ unsigned int data_len = le32_to_cpu(msg->hdr.data_len);
size_t len;
bool do_datacrc = !con->msgr->nocrc;
int ret;
le32_to_cpu(con->in_reply.connect_seq));
if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
- set_bit(LOSSYTX, &con->state);
+ set_bit(LOSSYTX, &con->flags);
prepare_read_tag(con);
break;
return 1;
}
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip);
+static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
+ struct ceph_msg_header *hdr);
static int read_partial_message_pages(struct ceph_connection *con,
struct page **pages,
- unsigned data_len, bool do_datacrc)
+ unsigned int data_len, bool do_datacrc)
{
void *p;
int ret;
#ifdef CONFIG_BLOCK
static int read_partial_message_bio(struct ceph_connection *con,
struct bio **bio_iter, int *bio_seg,
- unsigned data_len, bool do_datacrc)
+ unsigned int data_len, bool do_datacrc)
{
struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
void *p;
int size;
int end;
int ret;
- unsigned front_len, middle_len, data_len;
+ unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
- int skip;
u64 seq;
u32 crc;
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
- skip = 0;
- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
- if (skip) {
+ if (ceph_con_in_msg_alloc(con, &con->in_hdr)) {
/* skip this message */
dout("alloc_msg said skip message\n");
BUG_ON(con->in_msg);
"error allocating memory for incoming message";
return -ENOMEM;
}
+
+ BUG_ON(con->in_msg->con != con);
m = con->in_msg;
m->front.iov_len = 0; /* haven't read it yet */
if (m->middle)
{
struct ceph_msg *msg;
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
msg = con->in_msg;
con->in_msg = NULL;
+ ceph_con_put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
/* open the socket first? */
if (con->sock == NULL) {
+ clear_bit(NEGOTIATING, &con->state);
+ set_bit(CONNECTING, &con->state);
+
con_out_kvec_reset(con);
prepare_write_banner(con);
ret = prepare_write_connect(con);
if (ret < 0)
goto out;
prepare_read_banner(con);
- set_bit(CONNECTING, &con->state);
- clear_bit(NEGOTIATING, &con->state);
BUG_ON(con->in_msg);
con->in_tag = CEPH_MSGR_TAG_READY;
prepare_write_ack(con);
goto more;
}
- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
+ if (test_and_clear_bit(KEEPALIVE_PENDING, &con->flags)) {
prepare_write_keepalive(con);
goto more;
}
}
/* Nothing to do! */
- clear_bit(WRITE_PENDING, &con->state);
+ clear_bit(WRITE_PENDING, &con->flags);
dout("try_write nothing else to write.\n");
ret = 0;
out:
mutex_lock(&con->mutex);
restart:
- if (test_and_clear_bit(BACKOFF, &con->state)) {
+ if (test_and_clear_bit(BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay))) {
con_close_socket(con);
}
- if (test_and_clear_bit(SOCK_CLOSED, &con->state))
+ if (test_and_clear_bit(SOCK_CLOSED, &con->flags))
goto fault;
ret = try_read(con);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
- if (test_bit(LOSSYTX, &con->state)) {
+ if (test_bit(LOSSYTX, &con->flags)) {
dout("fault on LOSSYTX channel\n");
goto out;
}
con_close_socket(con);
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ ceph_con_put(con);
}
/* Requeue anything that hasn't been acked */
/* If there are no messages queued or keepalive pending, place
* the connection in a STANDBY state */
if (list_empty(&con->out_queue) &&
- !test_bit(KEEPALIVE_PENDING, &con->state)) {
+ !test_bit(KEEPALIVE_PENDING, &con->flags)) {
dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
- clear_bit(WRITE_PENDING, &con->state);
+ clear_bit(WRITE_PENDING, &con->flags);
set_bit(STANDBY, &con->state);
} else {
/* retry after a delay. */
* that when con_work restarts we schedule the
* delay then.
*/
- set_bit(BACKOFF, &con->state);
+ set_bit(BACKOFF, &con->flags);
}
}
/*
- * create a new messenger instance
+ * initialize a new messenger instance
*/
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
- u32 supported_features,
- u32 required_features)
+void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc)
{
- struct ceph_messenger *msgr;
-
- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
- if (msgr == NULL)
- return ERR_PTR(-ENOMEM);
-
msgr->supported_features = supported_features;
msgr->required_features = required_features;
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
+ msgr->nocrc = nocrc;
- dout("messenger_create %p\n", msgr);
- return msgr;
+ dout("%s %p\n", __func__, msgr);
}
-EXPORT_SYMBOL(ceph_messenger_create);
-
-void ceph_messenger_destroy(struct ceph_messenger *msgr)
-{
- dout("destroy %p\n", msgr);
- kfree(msgr);
- dout("destroyed messenger %p\n", msgr);
-}
-EXPORT_SYMBOL(ceph_messenger_destroy);
+EXPORT_SYMBOL(ceph_messenger_init);
static void clear_standby(struct ceph_connection *con)
{
mutex_lock(&con->mutex);
dout("clear_standby %p and ++connect_seq\n", con);
con->connect_seq++;
- WARN_ON(test_bit(WRITE_PENDING, &con->state));
- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
+ WARN_ON(test_bit(WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->flags));
mutex_unlock(&con->mutex);
}
}
/* queue */
mutex_lock(&con->mutex);
+
+ BUG_ON(msg->con != NULL);
+ msg->con = ceph_con_get(con);
+ BUG_ON(msg->con == NULL);
+
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
/* if there wasn't anything waiting to send before, queue
* new work */
clear_standby(con);
- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ if (test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_send);
/*
* Revoke a message that was previously queued for send
*/
-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke(struct ceph_msg *msg)
{
+ struct ceph_connection *con = msg->con;
+
+ if (!con)
+ return; /* Message not in our possession */
+
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p - was on queue\n", con, msg);
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- ceph_msg_put(msg);
+ BUG_ON(msg->con == NULL);
+ ceph_con_put(msg->con);
+ msg->con = NULL;
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("con_revoke %p msg %p - was sending\n", con, msg);
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- ceph_msg_put(msg);
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
mutex_unlock(&con->mutex);
}
/*
* Revoke a message that we may be reading data into
*/
-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
+ struct ceph_connection *con;
+
+ BUG_ON(msg == NULL);
+ if (!msg->con) {
+ dout("%s msg %p null con\n", __func__, msg);
+
+ return; /* Message not in our possession */
+ }
+
+ con = msg->con;
mutex_lock(&con->mutex);
- if (con->in_msg && con->in_msg == msg) {
- unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
- unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
- unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
+ if (con->in_msg == msg) {
+ unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
+ unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
+ unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
/* skip rest of message */
- dout("con_revoke_pages %p msg %p revoked\n", con, msg);
- con->in_base_pos = con->in_base_pos -
+ dout("%s %p msg %p revoked\n", __func__, con, msg);
+ con->in_base_pos = con->in_base_pos -
sizeof(struct ceph_msg_header) -
front_len -
middle_len -
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
} else {
- dout("con_revoke_pages %p msg %p pages %p no-op\n",
- con, con->in_msg, msg);
+ dout("%s %p in_msg %p msg %p no-op\n",
+ __func__, con, con->in_msg, msg);
}
mutex_unlock(&con->mutex);
}
{
dout("con_keepalive %p\n", con);
clear_standby(con);
- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
- test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ if (test_and_set_bit(KEEPALIVE_PENDING, &con->flags) == 0 &&
+ test_and_set_bit(WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
if (m == NULL)
goto out;
kref_init(&m->kref);
+
+ m->con = NULL;
INIT_LIST_HEAD(&m->list_head);
m->hdr.tid = 0;
}
/*
- * Generic message allocator, for incoming messages.
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg. Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns true if the message should be skipped, false otherwise.
+ * If true is returned (skip message), con->in_msg will be NULL.
+ * If false is returned, con->in_msg will contain a pointer to the
+ * newly-allocated message, or NULL in case of memory exhaustion.
*/
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip)
+static bool ceph_con_in_msg_alloc(struct ceph_connection *con,
+ struct ceph_msg_header *hdr)
{
int type = le16_to_cpu(hdr->type);
int front_len = le32_to_cpu(hdr->front_len);
int middle_len = le32_to_cpu(hdr->middle_len);
- struct ceph_msg *msg = NULL;
int ret;
+ BUG_ON(con->in_msg != NULL);
+
if (con->ops->alloc_msg) {
+ int skip = 0;
+
mutex_unlock(&con->mutex);
- msg = con->ops->alloc_msg(con, hdr, skip);
+ con->in_msg = con->ops->alloc_msg(con, hdr, &skip);
mutex_lock(&con->mutex);
- if (!msg || *skip)
- return NULL;
+ if (con->in_msg) {
+ con->in_msg->con = ceph_con_get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ }
+ if (skip)
+ con->in_msg = NULL;
+
+ if (!con->in_msg)
+ return skip != 0;
}
- if (!msg) {
- *skip = 0;
- msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
- if (!msg) {
+ if (!con->in_msg) {
+ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!con->in_msg) {
pr_err("unable to allocate msg type %d len %d\n",
type, front_len);
- return NULL;
+ return false;
}
- msg->page_alignment = le16_to_cpu(hdr->data_off);
+ con->in_msg->con = ceph_con_get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
}
- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
- if (middle_len && !msg->middle) {
- ret = ceph_alloc_middle(con, msg);
+ if (middle_len && !con->in_msg->middle) {
+ ret = ceph_alloc_middle(con, con->in_msg);
if (ret < 0) {
- ceph_msg_put(msg);
- return NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
}
}
- return msg;
+ return false;
}