aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc/sgi-xp/xpc_uv.c
diff options
context:
space:
mode:
authorRobin Holt <holt@sgi.com>2009-04-13 14:40:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-13 15:04:33 -0700
commita374c57b0764432a80303abee3d1afd1939b5a0a (patch)
treeb3e15b590d5342e5de6c2b62da4dd61fb2ad0836 /drivers/misc/sgi-xp/xpc_uv.c
parenta06bba4643ae10ac6b202dade1cde38bc5e08b25 (diff)
sgi-xpc: prevent false heartbeat failures
The heartbeat timeout functionality in sgi-xpc is currently not trained to the connection time. If a connection is made and the code is in the last polling window prior to doing a timeout, the next polling window will see the heartbeat as unchanged and initiate a no-heartbeat disconnect. Signed-off-by: Robin Holt <holt@sgi.com> Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/misc/sgi-xp/xpc_uv.c')
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c123
1 files changed, 43 insertions, 80 deletions
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index f7fff4727ed..97f7cb21a0a 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,8 +46,7 @@ struct uv_IO_APIC_route_entry {
};
#endif
-static atomic64_t xpc_heartbeat_uv;
-static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
@@ -423,41 +422,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
/* syncing of remote_act_state was just done above */
break;
- case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
- struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
- msg = container_of(msg_hdr,
- struct xpc_activate_mq_msg_heartbeat_req_uv,
- hdr);
- part_uv->heartbeat = msg->heartbeat;
- break;
- }
- case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
- struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
- msg = container_of(msg_hdr,
- struct xpc_activate_mq_msg_heartbeat_req_uv,
- hdr);
- part_uv->heartbeat = msg->heartbeat;
-
- spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
- part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
- spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
- break;
- }
- case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
- struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
- msg = container_of(msg_hdr,
- struct xpc_activate_mq_msg_heartbeat_req_uv,
- hdr);
- part_uv->heartbeat = msg->heartbeat;
-
- spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
- part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
- spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
- break;
- }
case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
struct xpc_activate_mq_msg_activate_req_uv *msg;
@@ -475,6 +439,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+ part_uv->heartbeat_gpa = msg->heartbeat_gpa;
if (msg->activate_gru_mq_desc_gpa !=
part_uv->activate_gru_mq_desc_gpa) {
@@ -759,7 +724,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
/*
* !!! Make our side think that the remote partition sent an activate
- * !!! message our way by doing what the activate IRQ handler would
+ * !!! mq message our way by doing what the activate IRQ handler would
* !!! do had one really been sent.
*/
@@ -808,88 +773,80 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
static int
xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
{
- rp->sn.activate_gru_mq_desc_gpa =
+ xpc_heartbeat_uv =
+ &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+ rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+ rp->sn.uv.activate_gru_mq_desc_gpa =
uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
return 0;
}
static void
-xpc_send_heartbeat_uv(int msg_type)
+xpc_allow_hb_uv(short partid)
{
- short partid;
- struct xpc_partition *part;
- struct xpc_activate_mq_msg_heartbeat_req_uv msg;
-
- /*
- * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
- * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
- * !!! seconds. This is an increase in numalink traffic.
- * ??? Is this good?
- */
-
- msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
-
- partid = find_first_bit(xpc_heartbeating_to_mask_uv,
- XP_MAX_NPARTITIONS_UV);
-
- while (partid < XP_MAX_NPARTITIONS_UV) {
- part = &xpc_partitions[partid];
+}
- xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
- msg_type);
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
- partid = find_next_bit(xpc_heartbeating_to_mask_uv,
- XP_MAX_NPARTITIONS_UV, partid + 1);
- }
+static void
+xpc_disallow_all_hbs_uv(void)
+{
}
static void
xpc_increment_heartbeat_uv(void)
{
- xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
+ xpc_heartbeat_uv->value++;
}
static void
xpc_offline_heartbeat_uv(void)
{
- xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+ xpc_increment_heartbeat_uv();
+ xpc_heartbeat_uv->offline = 1;
}
static void
xpc_online_heartbeat_uv(void)
{
- xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
+ xpc_increment_heartbeat_uv();
+ xpc_heartbeat_uv->offline = 0;
}
static void
xpc_heartbeat_init_uv(void)
{
- atomic64_set(&xpc_heartbeat_uv, 0);
- bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
- xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
+ xpc_heartbeat_uv->value = 1;
+ xpc_heartbeat_uv->offline = 0;
}
static void
xpc_heartbeat_exit_uv(void)
{
- xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+ xpc_offline_heartbeat_uv();
}
static enum xp_retval
xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
{
struct xpc_partition_uv *part_uv = &part->sn.uv;
- enum xp_retval ret = xpNoHeartbeat;
+ enum xp_retval ret;
- if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
- part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
+ ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+ part_uv->heartbeat_gpa,
+ sizeof(struct xpc_heartbeat_uv));
+ if (ret != xpSuccess)
+ return ret;
- if (part_uv->heartbeat != part->last_heartbeat ||
- (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
+ if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+ !part_uv->cached_heartbeat.offline) {
- part->last_heartbeat = part_uv->heartbeat;
- ret = xpSuccess;
- }
+ ret = xpNoHeartbeat;
+ } else {
+ part->last_heartbeat = part_uv->cached_heartbeat.value;
}
return ret;
}
@@ -904,8 +861,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+ part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
part->sn.uv.activate_gru_mq_desc_gpa =
- remote_rp->sn.activate_gru_mq_desc_gpa;
+ remote_rp->sn.uv.activate_gru_mq_desc_gpa;
/*
* ??? Is it a good idea to make this conditional on what is
@@ -913,8 +871,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
*/
if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+ msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
msg.activate_gru_mq_desc_gpa =
- xpc_rsvd_page->sn.activate_gru_mq_desc_gpa;
+ xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
}
@@ -1677,6 +1636,10 @@ xpc_init_uv(void)
xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
+
+ xpc_allow_hb = xpc_allow_hb_uv;
+ xpc_disallow_hb = xpc_disallow_hb_uv;
+ xpc_disallow_all_hbs = xpc_disallow_all_hbs_uv;
xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
xpc_online_heartbeat = xpc_online_heartbeat_uv;