summaryrefslogtreecommitdiffstats
path: root/drivers/hv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-04 04:57:49 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-04 04:57:49 +0200
commit7a53eea1f7b527fd3b6d7ca992914840981afe99 (patch)
tree35dfd7e14d5c44ae2d34e470aaaa68dbfec39324 /drivers/hv
parentMerge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/ke... (diff)
parentdrivers/misc/hpilo: Changes to support new security states in iLO5 FW (diff)
downloadlinux-7a53eea1f7b527fd3b6d7ca992914840981afe99.tar.xz
linux-7a53eea1f7b527fd3b6d7ca992914840981afe99.zip
Merge tag 'char-misc-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc driver updates from Greg KH: "Here's the "big" char and misc driver update for 4.9-rc1. Lots of little things here, all over the driver tree for subsystems that flow through me. Nothing major that I can discern, full details are in the shortlog. All have been in the linux-next tree with no reported issues" * tag 'char-misc-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (144 commits) drivers/misc/hpilo: Changes to support new security states in iLO5 FW at25: fix debug and error messaging misc/genwqe: ensure zero initialization vme: fake: remove unexpected unlock in fake_master_set() vme: fake: mark symbols static where possible spmi: pmic-arb: Return an error code if sanity check fails Drivers: hv: get rid of id in struct vmbus_channel Drivers: hv: make VMBus bus ids persistent mcb: Add a dma_device to mcb_device mcb: Enable PCI bus mastering by default mei: stop the stall timer worker if not needed clk: probe common clock drivers earlier vme: fake: fix build for 64-bit dma_addr_t ttyprintk: Neaten and simplify printing mei: me: add kaby point device ids coresight: tmc: mark symbols static where possible coresight: perf: deal with error condition properly Drivers: hv: hv_util: Avoid dynamic allocation in time synch fpga manager: Add hardware dependency to Zynq driver Drivers: hv: utils: Support TimeSync version 4.0 protocol samples. ...
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/channel.c148
-rw-r--r--drivers/hv/channel_mgmt.c130
-rw-r--r--drivers/hv/connection.c8
-rw-r--r--drivers/hv/hv.c8
-rw-r--r--drivers/hv/hv_balloon.c254
-rw-r--r--drivers/hv/hv_fcopy.c14
-rw-r--r--drivers/hv/hv_kvp.c27
-rw-r--r--drivers/hv/hv_snapshot.c109
-rw-r--r--drivers/hv/hv_util.c155
-rw-r--r--drivers/hv/hv_utils_transport.c15
-rw-r--r--drivers/hv/hv_utils_transport.h4
-rw-r--r--drivers/hv/hyperv_vmbus.h9
-rw-r--r--drivers/hv/ring_buffer.c76
-rw-r--r--drivers/hv/vmbus_drv.c16
14 files changed, 592 insertions, 381 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56dd261f7142..16f91c8490fe 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -43,7 +43,12 @@ static void vmbus_setevent(struct vmbus_channel *channel)
{
struct hv_monitor_page *monitorpage;
- if (channel->offermsg.monitor_allocated) {
+ /*
+ * For channels marked as in "low latency" mode
+ * bypass the monitor page mechanism.
+ */
+ if ((channel->offermsg.monitor_allocated) &&
+ (!channel->low_latency)) {
/* Each u32 represents 32 channels */
sync_set_bit(channel->offermsg.child_relid & 31,
(unsigned long *) vmbus_connection.send_int_page +
@@ -70,12 +75,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
{
struct vmbus_channel_open_channel *open_msg;
struct vmbus_channel_msginfo *open_info = NULL;
- void *in, *out;
unsigned long flags;
int ret, err = 0;
- unsigned long t;
struct page *page;
+ if (send_ringbuffer_size % PAGE_SIZE ||
+ recv_ringbuffer_size % PAGE_SIZE)
+ return -EINVAL;
+
spin_lock_irqsave(&newchannel->lock, flags);
if (newchannel->state == CHANNEL_OPEN_STATE) {
newchannel->state = CHANNEL_OPENING_STATE;
@@ -95,36 +102,33 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
recv_ringbuffer_size));
if (!page)
- out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(send_ringbuffer_size +
- recv_ringbuffer_size));
- else
- out = (void *)page_address(page);
+ page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
+ get_order(send_ringbuffer_size +
+ recv_ringbuffer_size));
- if (!out) {
+ if (!page) {
err = -ENOMEM;
- goto error0;
+ goto error_set_chnstate;
}
- in = (void *)((unsigned long)out + send_ringbuffer_size);
-
- newchannel->ringbuffer_pages = out;
+ newchannel->ringbuffer_pages = page_address(page);
newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
recv_ringbuffer_size) >> PAGE_SHIFT;
- ret = hv_ringbuffer_init(
- &newchannel->outbound, out, send_ringbuffer_size);
+ ret = hv_ringbuffer_init(&newchannel->outbound, page,
+ send_ringbuffer_size >> PAGE_SHIFT);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
- ret = hv_ringbuffer_init(
- &newchannel->inbound, in, recv_ringbuffer_size);
+ ret = hv_ringbuffer_init(&newchannel->inbound,
+ &page[send_ringbuffer_size >> PAGE_SHIFT],
+ recv_ringbuffer_size >> PAGE_SHIFT);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
@@ -132,14 +136,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
newchannel->ringbuffer_gpadlhandle = 0;
ret = vmbus_establish_gpadl(newchannel,
- newchannel->outbound.ring_buffer,
- send_ringbuffer_size +
- recv_ringbuffer_size,
- &newchannel->ringbuffer_gpadlhandle);
+ page_address(page),
+ send_ringbuffer_size +
+ recv_ringbuffer_size,
+ &newchannel->ringbuffer_gpadlhandle);
if (ret != 0) {
err = ret;
- goto error0;
+ goto error_free_pages;
}
/* Create and init the channel open message */
@@ -148,7 +152,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
GFP_KERNEL);
if (!open_info) {
err = -ENOMEM;
- goto error_gpadl;
+ goto error_free_gpadl;
}
init_completion(&open_info->waitevent);
@@ -164,7 +168,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
if (userdatalen > MAX_USER_DEFINED_BYTES) {
err = -EINVAL;
- goto error_gpadl;
+ goto error_free_gpadl;
}
if (userdatalen)
@@ -180,14 +184,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
if (ret != 0) {
err = ret;
- goto error1;
+ goto error_clean_msglist;
}
- t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
- if (t == 0) {
- err = -ETIMEDOUT;
- goto error1;
- }
+ wait_for_completion(&open_info->waitevent);
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&open_info->msglistentry);
@@ -195,25 +195,27 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
if (open_info->response.open_result.status) {
err = -EAGAIN;
- goto error_gpadl;
+ goto error_free_gpadl;
}
newchannel->state = CHANNEL_OPENED_STATE;
kfree(open_info);
return 0;
-error1:
+error_clean_msglist:
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&open_info->msglistentry);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-error_gpadl:
+error_free_gpadl:
vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-
-error0:
- free_pages((unsigned long)out,
- get_order(send_ringbuffer_size + recv_ringbuffer_size));
kfree(open_info);
+error_free_pages:
+ hv_ringbuffer_cleanup(&newchannel->outbound);
+ hv_ringbuffer_cleanup(&newchannel->inbound);
+ __free_pages(page,
+ get_order(send_ringbuffer_size + recv_ringbuffer_size));
+error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
}
@@ -238,8 +240,7 @@ EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
* create_gpadl_header - Creates a gpadl for the specified buffer
*/
static int create_gpadl_header(void *kbuffer, u32 size,
- struct vmbus_channel_msginfo **msginfo,
- u32 *messagecount)
+ struct vmbus_channel_msginfo **msginfo)
{
int i;
int pagecount;
@@ -283,7 +284,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
*msginfo = msgheader;
- *messagecount = 1;
pfnsum = pfncount;
pfnleft = pagecount - pfncount;
@@ -323,7 +323,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
}
msgbody->msgsize = msgsize;
- (*messagecount)++;
gpadl_body =
(struct vmbus_channel_gpadl_body *)msgbody->msg;
@@ -352,6 +351,8 @@ static int create_gpadl_header(void *kbuffer, u32 size,
msgheader = kzalloc(msgsize, GFP_KERNEL);
if (msgheader == NULL)
goto nomem;
+
+ INIT_LIST_HEAD(&msgheader->submsglist);
msgheader->msgsize = msgsize;
gpadl_header = (struct vmbus_channel_gpadl_header *)
@@ -366,7 +367,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
*msginfo = msgheader;
- *messagecount = 1;
}
return 0;
@@ -390,8 +390,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
struct vmbus_channel_gpadl_header *gpadlmsg;
struct vmbus_channel_gpadl_body *gpadl_body;
struct vmbus_channel_msginfo *msginfo = NULL;
- struct vmbus_channel_msginfo *submsginfo;
- u32 msgcount;
+ struct vmbus_channel_msginfo *submsginfo, *tmp;
struct list_head *curr;
u32 next_gpadl_handle;
unsigned long flags;
@@ -400,7 +399,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
next_gpadl_handle =
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
- ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
+ ret = create_gpadl_header(kbuffer, size, &msginfo);
if (ret)
return ret;
@@ -423,24 +422,21 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
if (ret != 0)
goto cleanup;
- if (msgcount > 1) {
- list_for_each(curr, &msginfo->submsglist) {
-
- submsginfo = (struct vmbus_channel_msginfo *)curr;
- gpadl_body =
- (struct vmbus_channel_gpadl_body *)submsginfo->msg;
+ list_for_each(curr, &msginfo->submsglist) {
+ submsginfo = (struct vmbus_channel_msginfo *)curr;
+ gpadl_body =
+ (struct vmbus_channel_gpadl_body *)submsginfo->msg;
- gpadl_body->header.msgtype =
- CHANNELMSG_GPADL_BODY;
- gpadl_body->gpadl = next_gpadl_handle;
+ gpadl_body->header.msgtype =
+ CHANNELMSG_GPADL_BODY;
+ gpadl_body->gpadl = next_gpadl_handle;
- ret = vmbus_post_msg(gpadl_body,
- submsginfo->msgsize -
- sizeof(*submsginfo));
- if (ret != 0)
- goto cleanup;
+ ret = vmbus_post_msg(gpadl_body,
+ submsginfo->msgsize -
+ sizeof(*submsginfo));
+ if (ret != 0)
+ goto cleanup;
- }
}
wait_for_completion(&msginfo->waitevent);
@@ -451,6 +447,10 @@ cleanup:
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&msginfo->msglistentry);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist,
+ msglistentry) {
+ kfree(submsginfo);
+ }
kfree(msginfo);
return ret;
@@ -512,7 +512,6 @@ static void reset_channel_cb(void *arg)
static int vmbus_close_internal(struct vmbus_channel *channel)
{
struct vmbus_channel_close_channel *msg;
- struct tasklet_struct *tasklet;
int ret;
/*
@@ -524,8 +523,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
* To resolve the race, we can serialize them by disabling the
* tasklet when the latter is running here.
*/
- tasklet = hv_context.event_dpc[channel->target_cpu];
- tasklet_disable(tasklet);
+ hv_event_tasklet_disable(channel);
/*
* In case a device driver's probe() fails (e.g.,
@@ -591,7 +589,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
out:
- tasklet_enable(tasklet);
+ hv_event_tasklet_enable(channel);
return ret;
}
@@ -659,7 +657,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
/*
* Signalling the host is conditional on many factors:
@@ -680,11 +678,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
* mechanism which can hurt the performance otherwise.
*/
- if (channel->signal_policy)
- signal = true;
- else
- kick_q = true;
-
if (((ret == 0) && kick_q && signal) ||
(ret && !is_hvsock_channel(channel)))
vmbus_setevent(channel);
@@ -777,7 +770,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
/*
* Signalling the host is conditional on many factors:
@@ -795,11 +788,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
* enough condition that it should not matter.
*/
- if (channel->signal_policy)
- signal = true;
- else
- kick_q = true;
-
if (((ret == 0) && kick_q && signal) || (ret))
vmbus_setevent(channel);
@@ -861,7 +849,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
if (ret == 0 && signal)
vmbus_setevent(channel);
@@ -926,7 +914,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
- &signal, lock);
+ &signal, lock, channel->signal_policy);
if (ret == 0 && signal)
vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b6c1211b4df7..96a85cd39580 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
+#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/mm.h>
@@ -138,10 +139,32 @@ static const struct vmbus_device vmbus_devs[] = {
},
};
-static u16 hv_get_dev_type(const uuid_le *guid)
+static const struct {
+ uuid_le guid;
+} vmbus_unsupported_devs[] = {
+ { HV_AVMA1_GUID },
+ { HV_AVMA2_GUID },
+ { HV_RDV_GUID },
+};
+
+static bool is_unsupported_vmbus_devs(const uuid_le *guid)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
+ if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+ return true;
+ return false;
+}
+
+static u16 hv_get_dev_type(const struct vmbus_channel *channel)
{
+ const uuid_le *guid = &channel->offermsg.offer.if_type;
u16 i;
+ if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
+ return HV_UNKOWN;
+
for (i = HV_IDE; i < HV_UNKOWN; i++) {
if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
return i;
@@ -251,14 +274,12 @@ EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
*/
static struct vmbus_channel *alloc_channel(void)
{
- static atomic_t chan_num = ATOMIC_INIT(0);
struct vmbus_channel *channel;
channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
if (!channel)
return NULL;
- channel->id = atomic_inc_return(&chan_num);
channel->acquire_ring_lock = true;
spin_lock_init(&channel->inbound_lock);
spin_lock_init(&channel->lock);
@@ -303,16 +324,32 @@ static void vmbus_release_relid(u32 relid)
vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
}
+void hv_event_tasklet_disable(struct vmbus_channel *channel)
+{
+ struct tasklet_struct *tasklet;
+ tasklet = hv_context.event_dpc[channel->target_cpu];
+ tasklet_disable(tasklet);
+}
+
+void hv_event_tasklet_enable(struct vmbus_channel *channel)
+{
+ struct tasklet_struct *tasklet;
+ tasklet = hv_context.event_dpc[channel->target_cpu];
+ tasklet_enable(tasklet);
+
+ /* In case there is any pending event */
+ tasklet_schedule(tasklet);
+}
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
unsigned long flags;
struct vmbus_channel *primary_channel;
- vmbus_release_relid(relid);
-
BUG_ON(!channel->rescind);
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+ hv_event_tasklet_disable(channel);
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -321,6 +358,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
percpu_channel_deq(channel);
put_cpu();
}
+ hv_event_tasklet_enable(channel);
if (channel->primary_channel == NULL) {
list_del(&channel->listentry);
@@ -338,8 +376,11 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
* We need to free the bit for init_vp_index() to work in the case
* of sub-channel, when we reload drivers like hv_netvsc.
*/
- cpumask_clear_cpu(channel->target_cpu,
- &primary_channel->alloced_cpus_in_node);
+ if (channel->affinity_policy == HV_LOCALIZED)
+ cpumask_clear_cpu(channel->target_cpu,
+ &primary_channel->alloced_cpus_in_node);
+
+ vmbus_release_relid(relid);
free_channel(channel);
}
@@ -405,10 +446,13 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
goto err_free_chan;
}
- dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+ dev_type = hv_get_dev_type(newchannel);
+ if (dev_type == HV_NIC)
+ set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
init_vp_index(newchannel, dev_type);
+ hv_event_tasklet_disable(newchannel);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -418,6 +462,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
percpu_channel_enq(newchannel);
put_cpu();
}
+ hv_event_tasklet_enable(newchannel);
/*
* This state is used to indicate a successful open
@@ -463,12 +508,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
return;
err_deq_chan:
- vmbus_release_relid(newchannel->offermsg.child_relid);
-
mutex_lock(&vmbus_connection.channel_mutex);
list_del(&newchannel->listentry);
mutex_unlock(&vmbus_connection.channel_mutex);
+ hv_event_tasklet_disable(newchannel);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -477,6 +521,9 @@ err_deq_chan:
percpu_channel_deq(newchannel);
put_cpu();
}
+ hv_event_tasklet_enable(newchannel);
+
+ vmbus_release_relid(newchannel->offermsg.child_relid);
err_free_chan:
free_channel(newchannel);
@@ -522,17 +569,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
}
/*
- * We distribute primary channels evenly across all the available
- * NUMA nodes and within the assigned NUMA node we will assign the
- * first available CPU to the primary channel.
- * The sub-channels will be assigned to the CPUs available in the
- * NUMA node evenly.
+ * Based on the channel affinity policy, we will assign the NUMA
+ * nodes.
*/
- if (!primary) {
+
+ if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
while (true) {
next_node = next_numa_node_id++;
- if (next_node == nr_node_ids)
+ if (next_node == nr_node_ids) {
next_node = next_numa_node_id = 0;
+ continue;
+ }
if (cpumask_empty(cpumask_of_node(next_node)))
continue;
break;
@@ -556,15 +603,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
cur_cpu = -1;
- /*
- * Normally Hyper-V host doesn't create more subchannels than there
- * are VCPUs on the node but it is possible when not all present VCPUs
- * on the node are initialized by guest. Clear the alloced_cpus_in_node
- * to start over.
- */
- if (cpumask_equal(&primary->alloced_cpus_in_node,
- cpumask_of_node(primary->numa_node)))
- cpumask_clear(&primary->alloced_cpus_in_node);
+ if (primary->affinity_policy == HV_LOCALIZED) {
+ /*
+ * Normally Hyper-V host doesn't create more subchannels
+ * than there are VCPUs on the node but it is possible when not
+ * all present VCPUs on the node are initialized by guest.
+ * Clear the alloced_cpus_in_node to start over.
+ */
+ if (cpumask_equal(&primary->alloced_cpus_in_node,
+ cpumask_of_node(primary->numa_node)))
+ cpumask_clear(&primary->alloced_cpus_in_node);
+ }
while (true) {
cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -575,17 +624,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
continue;
}
- /*
- * NOTE: in the case of sub-channel, we clear the sub-channel
- * related bit(s) in primary->alloced_cpus_in_node in
- * hv_process_channel_removal(), so when we reload drivers
- * like hv_netvsc in SMP guest, here we're able to re-allocate
- * bit from primary->alloced_cpus_in_node.
- */
- if (!cpumask_test_cpu(cur_cpu,
- &primary->alloced_cpus_in_node)) {
- cpumask_set_cpu(cur_cpu,
- &primary->alloced_cpus_in_node);
+ if (primary->affinity_policy == HV_LOCALIZED) {
+ /*
+ * NOTE: in the case of sub-channel, we clear the
+ * sub-channel related bit(s) in
+ * primary->alloced_cpus_in_node in
+ * hv_process_channel_removal(), so when we
+ * reload drivers like hv_netvsc in SMP guest, here
+ * we're able to re-allocate
+ * bit from primary->alloced_cpus_in_node.
+ */
+ if (!cpumask_test_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node)) {
+ cpumask_set_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node);
+ cpumask_set_cpu(cur_cpu, alloced_mask);
+ break;
+ }
+ } else {
cpumask_set_cpu(cur_cpu, alloced_mask);
break;
}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index fcf8a02dc0ea..78e6368a4423 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -439,7 +439,7 @@ int vmbus_post_msg(void *buffer, size_t buflen)
union hv_connection_id conn_id;
int ret = 0;
int retries = 0;
- u32 msec = 1;
+ u32 usec = 1;
conn_id.asu32 = 0;
conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
@@ -472,9 +472,9 @@ int vmbus_post_msg(void *buffer, size_t buflen)
}
retries++;
- msleep(msec);
- if (msec < 2048)
- msec *= 2;
+ udelay(usec);
+ if (usec < 2048)
+ usec *= 2;
}
return ret;
}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1c086ba3b9a..60dbd6cb4640 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -278,7 +278,7 @@ cleanup:
*
* This routine is called normally during driver unloading or exiting.
*/
-void hv_cleanup(void)
+void hv_cleanup(bool crash)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
@@ -288,7 +288,8 @@ void hv_cleanup(void)
if (hv_context.hypercall_page) {
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
- vfree(hv_context.hypercall_page);
+ if (!crash)
+ vfree(hv_context.hypercall_page);
hv_context.hypercall_page = NULL;
}
@@ -308,7 +309,8 @@ void hv_cleanup(void)
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
- vfree(hv_context.tsc_page);
+ if (!crash)
+ vfree(hv_context.tsc_page);
hv_context.tsc_page = NULL;
}
#endif
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index df35fb7ed5df..fdf8da929cbe 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -430,16 +430,27 @@ struct dm_info_msg {
* currently hot added. We hot add in multiples of 128M
* chunks; it is possible that we may not be able to bring
* online all the pages in the region. The range
- * covered_end_pfn defines the pages that can
+ * covered_start_pfn:covered_end_pfn defines the pages that can
* be brough online.
*/
struct hv_hotadd_state {
struct list_head list;
unsigned long start_pfn;
+ unsigned long covered_start_pfn;
unsigned long covered_end_pfn;
unsigned long ha_end_pfn;
unsigned long end_pfn;
+ /*
+ * A list of gaps.
+ */
+ struct list_head gap_list;
+};
+
+struct hv_hotadd_gap {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
};
struct balloon_state {
@@ -536,7 +547,11 @@ struct hv_dynmem_device {
*/
struct task_struct *thread;
- struct mutex ha_region_mutex;
+ /*
+ * Protects ha_region_list, num_pages_onlined counter and individual
+ * regions from ha_region_list.
+ */
+ spinlock_t ha_lock;
/*
* A list of hot-add regions.
@@ -560,18 +575,14 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
void *v)
{
struct memory_notify *mem = (struct memory_notify *)v;
+ unsigned long flags;
switch (val) {
- case MEM_GOING_ONLINE:
- mutex_lock(&dm_device.ha_region_mutex);
- break;
-
case MEM_ONLINE:
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
dm_device.num_pages_onlined += mem->nr_pages;
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
case MEM_CANCEL_ONLINE:
- if (val == MEM_ONLINE ||
- mutex_is_locked(&dm_device.ha_region_mutex))
- mutex_unlock(&dm_device.ha_region_mutex);
if (dm_device.ha_waiting) {
dm_device.ha_waiting = false;
complete(&dm_device.ol_waitevent);
@@ -579,10 +590,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
break;
case MEM_OFFLINE:
- mutex_lock(&dm_device.ha_region_mutex);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
dm_device.num_pages_onlined -= mem->nr_pages;
- mutex_unlock(&dm_device.ha_region_mutex);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
+ case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
case MEM_CANCEL_OFFLINE:
break;
@@ -595,18 +607,46 @@ static struct notifier_block hv_memory_nb = {
.priority = 0
};
+/* Check if the particular page is backed and can be onlined and online it. */
+static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
+{
+ unsigned long cur_start_pgp;
+ unsigned long cur_end_pgp;
+ struct hv_hotadd_gap *gap;
+
+ cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+ /* The page is not backed. */
+ if (((unsigned long)pg < cur_start_pgp) ||
+ ((unsigned long)pg >= cur_end_pgp))
+ return;
+
+ /* Check for gaps. */
+ list_for_each_entry(gap, &has->gap_list, list) {
+ cur_start_pgp = (unsigned long)
+ pfn_to_page(gap->start_pfn);
+ cur_end_pgp = (unsigned long)
+ pfn_to_page(gap->end_pfn);
+ if (((unsigned long)pg >= cur_start_pgp) &&
+ ((unsigned long)pg < cur_end_pgp)) {
+ return;
+ }
+ }
+
+ /* This frame is currently backed; online the page. */
+ __online_page_set_limits(pg);
+ __online_page_increment_counters(pg);
+ __online_page_free(pg);
+}
-static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
+static void hv_bring_pgs_online(struct hv_hotadd_state *has,
+ unsigned long start_pfn, unsigned long size)
{
int i;
- for (i = 0; i < size; i++) {
- struct page *pg;
- pg = pfn_to_page(start_pfn + i);
- __online_page_set_limits(pg);
- __online_page_increment_counters(pg);
- __online_page_free(pg);
- }
+ for (i = 0; i < size; i++)
+ hv_page_online_one(has, pfn_to_page(start_pfn + i));
}
static void hv_mem_hot_add(unsigned long start, unsigned long size,
@@ -618,9 +658,12 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
unsigned long start_pfn;
unsigned long processed_pfn;
unsigned long total_pfn = pfn_count;
+ unsigned long flags;
for (i = 0; i < (size/HA_CHUNK); i++) {
start_pfn = start + (i * HA_CHUNK);
+
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn += HA_CHUNK;
if (total_pfn > HA_CHUNK) {
@@ -632,11 +675,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
}
has->covered_end_pfn += processed_pfn;
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
init_completion(&dm_device.ol_waitevent);
- dm_device.ha_waiting = true;
+ dm_device.ha_waiting = !memhp_auto_online;
- mutex_unlock(&dm_device.ha_region_mutex);
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
(HA_CHUNK << PAGE_SHIFT));
@@ -653,20 +696,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
*/
do_hot_add = false;
}
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
has->ha_end_pfn -= HA_CHUNK;
has->covered_end_pfn -= processed_pfn;
- mutex_lock(&dm_device.ha_region_mutex);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
break;
}
/*
- * Wait for the memory block to be onlined.
- * Since the hot add has succeeded, it is ok to
- * proceed even if the pages in the hot added region
- * have not been "onlined" within the allowed time.
+ * Wait for the memory block to be onlined when memory onlining
+ * is done outside of kernel (memhp_auto_online). Since the hot
+ * add has succeeded, it is ok to proceed even if the pages in
+ * the hot added region have not been "onlined" within the
+ * allowed time.
*/
- wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
- mutex_lock(&dm_device.ha_region_mutex);
+ if (dm_device.ha_waiting)
+ wait_for_completion_timeout(&dm_device.ol_waitevent,
+ 5*HZ);
post_status(&dm_device);
}
@@ -675,47 +721,64 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
static void hv_online_page(struct page *pg)
{
- struct list_head *cur;
struct hv_hotadd_state *has;
unsigned long cur_start_pgp;
unsigned long cur_end_pgp;
+ unsigned long flags;
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
- cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
- cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
+ cur_start_pgp = (unsigned long)
+ pfn_to_page(has->start_pfn);
+ cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
- if (((unsigned long)pg >= cur_start_pgp) &&
- ((unsigned long)pg < cur_end_pgp)) {
- /*
- * This frame is currently backed; online the
- * page.
- */
- __online_page_set_limits(pg);
- __online_page_increment_counters(pg);
- __online_page_free(pg);
- }
+ /* The page belongs to a different HAS. */
+ if (((unsigned long)pg < cur_start_pgp) ||
+ ((unsigned long)pg >= cur_end_pgp))
+ continue;
+
+ hv_page_online_one(has, pg);
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
-static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
{
- struct list_head *cur;
struct hv_hotadd_state *has;
+ struct hv_hotadd_gap *gap;
unsigned long residual, new_inc;
+ int ret = 0;
+ unsigned long flags;
- if (list_empty(&dm_device.ha_region_list))
- return false;
-
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
-
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
/*
* If the pfn range we are dealing with is not in the current
* "hot add block", move on.
*/
if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
continue;
+
+ /*
+ * If the current start pfn is not where the covered_end
+ * is, create a gap and update covered_end_pfn.
+ */
+ if (has->covered_end_pfn != start_pfn) {
+ gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+ if (!gap) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ INIT_LIST_HEAD(&gap->list);
+ gap->start_pfn = has->covered_end_pfn;
+ gap->end_pfn = start_pfn;
+ list_add_tail(&gap->list, &has->gap_list);
+
+ has->covered_end_pfn = start_pfn;
+ }
+
/*
* If the current hot add-request extends beyond
* our current limit; extend it.
@@ -732,19 +795,12 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
has->end_pfn += new_inc;
}
- /*
- * If the current start pfn is not where the covered_end
- * is, update it.
- */
-
- if (has->covered_end_pfn != start_pfn)
- has->covered_end_pfn = start_pfn;
-
- return true;
-
+ ret = 1;
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- return false;
+ return ret;
}
static unsigned long handle_pg_range(unsigned long pg_start,
@@ -753,17 +809,13 @@ static unsigned long handle_pg_range(unsigned long pg_start,
unsigned long start_pfn = pg_start;
unsigned long pfn_cnt = pg_count;
unsigned long size;
- struct list_head *cur;
struct hv_hotadd_state *has;
unsigned long pgs_ol = 0;
unsigned long old_covered_state;
+ unsigned long res = 0, flags;
- if (list_empty(&dm_device.ha_region_list))
- return 0;
-
- list_for_each(cur, &dm_device.ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
-
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry(has, &dm_device.ha_region_list, list) {
/*
* If the pfn range we are dealing with is not in the current
* "hot add block", move on.
@@ -783,6 +835,8 @@ static unsigned long handle_pg_range(unsigned long pg_start,
if (pgs_ol > pfn_cnt)
pgs_ol = pfn_cnt;
+ has->covered_end_pfn += pgs_ol;
+ pfn_cnt -= pgs_ol;
/*
* Check if the corresponding memory block is already
* online by checking its last previously backed page.
@@ -791,10 +845,8 @@ static unsigned long handle_pg_range(unsigned long pg_start,
*/
if (start_pfn > has->start_pfn &&
!PageReserved(pfn_to_page(start_pfn - 1)))
- hv_bring_pgs_online(start_pfn, pgs_ol);
+ hv_bring_pgs_online(has, start_pfn, pgs_ol);
- has->covered_end_pfn += pgs_ol;
- pfn_cnt -= pgs_ol;
}
if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
@@ -813,17 +865,20 @@ static unsigned long handle_pg_range(unsigned long pg_start,
} else {
pfn_cnt = size;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
}
/*
* If we managed to online any pages that were given to us,
* we declare success.
*/
- return has->covered_end_pfn - old_covered_state;
-
+ res = has->covered_end_pfn - old_covered_state;
+ break;
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- return 0;
+ return res;
}
static unsigned long process_hot_add(unsigned long pg_start,
@@ -832,13 +887,20 @@ static unsigned long process_hot_add(unsigned long pg_start,
unsigned long rg_size)
{
struct hv_hotadd_state *ha_region = NULL;
+ int covered;
+ unsigned long flags;
if (pfn_cnt == 0)
return 0;
- if (!dm_device.host_specified_ha_region)
- if (pfn_covered(pg_start, pfn_cnt))
+ if (!dm_device.host_specified_ha_region) {
+ covered = pfn_covered(pg_start, pfn_cnt);
+ if (covered < 0)
+ return 0;
+
+ if (covered)
goto do_pg_range;
+ }
/*
* If the host has specified a hot-add range; deal with it first.
@@ -850,12 +912,17 @@ static unsigned long process_hot_add(unsigned long pg_start,
return 0;
INIT_LIST_HEAD(&ha_region->list);
+ INIT_LIST_HEAD(&ha_region->gap_list);
- list_add_tail(&ha_region->list, &dm_device.ha_region_list);
ha_region->start_pfn = rg_start;
ha_region->ha_end_pfn = rg_start;
+ ha_region->covered_start_pfn = pg_start;
ha_region->covered_end_pfn = pg_start;
ha_region->end_pfn = rg_start + rg_size;
+
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
}
do_pg_range:
@@ -882,7 +949,6 @@ static void hot_add_req(struct work_struct *dummy)
resp.hdr.size = sizeof(struct dm_hot_add_response);
#ifdef CONFIG_MEMORY_HOTPLUG
- mutex_lock(&dm_device.ha_region_mutex);
pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
@@ -916,7 +982,6 @@ static void hot_add_req(struct work_struct *dummy)
rg_start, rg_sz);
dm->num_pages_added += resp.page_count;
- mutex_unlock(&dm_device.ha_region_mutex);
#endif
/*
* The result field of the response structure has the
@@ -1010,7 +1075,6 @@ static unsigned long compute_balloon_floor(void)
static void post_status(struct hv_dynmem_device *dm)
{
struct dm_status status;
- struct sysinfo val;
unsigned long now = jiffies;
unsigned long last_post = last_post_time;
@@ -1022,7 +1086,6 @@ static void post_status(struct hv_dynmem_device *dm)
if (!time_after(now, (last_post_time + HZ)))
return;
- si_meminfo(&val);
memset(&status, 0, sizeof(struct dm_status));
status.hdr.type = DM_STATUS_REPORT;
status.hdr.size = sizeof(struct dm_status);
@@ -1038,7 +1101,7 @@ static void post_status(struct hv_dynmem_device *dm)
* num_pages_onlined) as committed to the host, otherwise it can try
* asking us to balloon them out.
*/
- status.num_avail = val.freeram;
+ status.num_avail = si_mem_available();
status.num_committed = vm_memory_committed() +
dm->num_pages_ballooned +
(dm->num_pages_added > dm->num_pages_onlined ?
@@ -1144,7 +1207,7 @@ static void balloon_up(struct work_struct *dummy)
int ret;
bool done = false;
int i;
- struct sysinfo val;
+ long avail_pages;
unsigned long floor;
/* The host balloons pages in 2M granularity. */
@@ -1156,12 +1219,12 @@ static void balloon_up(struct work_struct *dummy)
*/
alloc_unit = 512;
- si_meminfo(&val);
+ avail_pages = si_mem_available();
floor = compute_balloon_floor();
/* Refuse to balloon below the floor, keep the 2M granularity. */
- if (val.freeram < num_pages || val.freeram - num_pages < floor) {
- num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+ if (avail_pages < num_pages || avail_pages - num_pages < floor) {
+ num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
num_pages -= num_pages % PAGES_IN_2M;
}
@@ -1172,7 +1235,6 @@ static void balloon_up(struct work_struct *dummy)
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
-
num_pages -= num_ballooned;
num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
bl_resp, alloc_unit);
@@ -1461,7 +1523,7 @@ static int balloon_probe(struct hv_device *dev,
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
INIT_LIST_HEAD(&dm_device.ha_region_list);
- mutex_init(&dm_device.ha_region_mutex);
+ spin_lock_init(&dm_device.ha_lock);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
dm_device.host_specified_ha_region = false;
@@ -1580,8 +1642,9 @@ probe_error0:
static int balloon_remove(struct hv_device *dev)
{
struct hv_dynmem_device *dm = hv_get_drvdata(dev);
- struct list_head *cur, *tmp;
- struct hv_hotadd_state *has;
+ struct hv_hotadd_state *has, *tmp;
+ struct hv_hotadd_gap *gap, *tmp_gap;
+ unsigned long flags;
if (dm->num_pages_ballooned != 0)
pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
@@ -1596,11 +1659,16 @@ static int balloon_remove(struct hv_device *dev)
restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
#endif
- list_for_each_safe(cur, tmp, &dm->ha_region_list) {
- has = list_entry(cur, struct hv_hotadd_state, list);
+ spin_lock_irqsave(&dm_device.ha_lock, flags);
+ list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
+ list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
+ list_del(&gap->list);
+ kfree(gap);
+ }
list_del(&has->list);
kfree(has);
}
+ spin_unlock_irqrestore(&dm_device.ha_lock, flags);
return 0;
}
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23c70799ad8a..8b2ba98831ec 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -83,6 +83,12 @@ static void fcopy_timeout_func(struct work_struct *dummy)
hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
}
+static void fcopy_register_done(void)
+{
+ pr_debug("FCP: userspace daemon registered\n");
+ hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+}
+
static int fcopy_handle_handshake(u32 version)
{
u32 our_ver = FCOPY_CURRENT_VERSION;
@@ -94,7 +100,8 @@ static int fcopy_handle_handshake(u32 version)
break;
case FCOPY_VERSION_1:
/* Daemon expects us to reply with our own version */
- if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+ if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+ fcopy_register_done))
return -EFAULT;
dm_reg_value = version;
break;
@@ -107,8 +114,7 @@ static int fcopy_handle_handshake(u32 version)
*/
return -EINVAL;
}
- pr_debug("FCP: userspace daemon ver. %d registered\n", version);
- hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+ pr_debug("FCP: userspace daemon ver. %d connected\n", version);
return 0;
}
@@ -161,7 +167,7 @@ static void fcopy_send_data(struct work_struct *dummy)
}
fcopy_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, out_src, out_len);
+ rc = hvutil_transport_send(hvt, out_src, out_len, NULL);
if (rc) {
pr_debug("FCP: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&fcopy_timeout_work)) {
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index cb1a9160aab1..5e1fdc8d32ab 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -102,6 +102,17 @@ static void kvp_poll_wrapper(void *channel)
hv_kvp_onchannelcallback(channel);
}
+static void kvp_register_done(void)
+{
+ /*
+ * If we're still negotiating with the host cancel the timeout
+ * work to not poll the channel twice.
+ */
+ pr_debug("KVP: userspace daemon registered\n");
+ cancel_delayed_work_sync(&kvp_host_handshake_work);
+ hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
+}
+
static void
kvp_register(int reg_value)
{
@@ -116,7 +127,8 @@ kvp_register(int reg_value)
kvp_msg->kvp_hdr.operation = reg_value;
strcpy(version, HV_DRV_VERSION);
- hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg));
+ hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
+ kvp_register_done);
kfree(kvp_msg);
}
}
@@ -158,17 +170,10 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg)
/*
* We have a compatible daemon; complete the handshake.
*/
- pr_debug("KVP: userspace daemon ver. %d registered\n",
- KVP_OP_REGISTER);
+ pr_debug("KVP: userspace daemon ver. %d connected\n",
+ msg->kvp_hdr.operation);
kvp_register(dm_reg_value);
- /*
- * If we're still negotiating with the host cancel the timeout
- * work to not poll the channel twice.
- */
- cancel_delayed_work_sync(&kvp_host_handshake_work);
- hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
-
return 0;
}
@@ -455,7 +460,7 @@ kvp_send_key(struct work_struct *dummy)
}
kvp_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, message, sizeof(*message));
+ rc = hvutil_transport_send(hvt, message, sizeof(*message), NULL);
if (rc) {
pr_debug("KVP: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&kvp_timeout_work)) {
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 3fba14e88f03..a6707133c297 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -67,11 +67,11 @@ static const char vss_devname[] = "vmbus/hv_vss";
static __u8 *recv_buffer;
static struct hvutil_transport *hvt;
-static void vss_send_op(struct work_struct *dummy);
static void vss_timeout_func(struct work_struct *dummy);
+static void vss_handle_request(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
-static DECLARE_WORK(vss_send_op_work, vss_send_op);
+static DECLARE_WORK(vss_handle_request_work, vss_handle_request);
static void vss_poll_wrapper(void *channel)
{
@@ -95,6 +95,12 @@ static void vss_timeout_func(struct work_struct *dummy)
hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
}
+static void vss_register_done(void)
+{
+ hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+ pr_debug("VSS: userspace daemon registered\n");
+}
+
static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
{
u32 our_ver = VSS_OP_REGISTER1;
@@ -105,16 +111,16 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
dm_reg_value = VSS_OP_REGISTER;
break;
case VSS_OP_REGISTER1:
- /* Daemon expects us to reply with our own version*/
- if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+ /* Daemon expects us to reply with our own version */
+ if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+ vss_register_done))
return -EFAULT;
dm_reg_value = VSS_OP_REGISTER1;
break;
default:
return -EINVAL;
}
- hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
- pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value);
+ pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
return 0;
}
@@ -136,6 +142,11 @@ static int vss_on_msg(void *msg, int len)
return vss_handle_handshake(vss_msg);
} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
vss_transaction.state = HVUTIL_USERSPACE_RECV;
+
+ if (vss_msg->vss_hdr.operation == VSS_OP_HOT_BACKUP)
+ vss_transaction.msg->vss_cf.flags =
+ VSS_HBU_NO_AUTO_RECOVERY;
+
if (cancel_delayed_work_sync(&vss_timeout_work)) {
vss_respond_to_host(vss_msg->error);
/* Transaction is finished, reset the state. */
@@ -150,8 +161,7 @@ static int vss_on_msg(void *msg, int len)
return 0;
}
-
-static void vss_send_op(struct work_struct *dummy)
+static void vss_send_op(void)
{
int op = vss_transaction.msg->vss_hdr.operation;
int rc;
@@ -168,7 +178,10 @@ static void vss_send_op(struct work_struct *dummy)
vss_msg->vss_hdr.operation = op;
vss_transaction.state = HVUTIL_USERSPACE_REQ;
- rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg));
+
+ schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
+
+ rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
if (rc) {
pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
if (cancel_delayed_work_sync(&vss_timeout_work)) {
@@ -182,6 +195,38 @@ static void vss_send_op(struct work_struct *dummy)
return;
}
+static void vss_handle_request(struct work_struct *dummy)
+{
+ switch (vss_transaction.msg->vss_hdr.operation) {
+ /*
+ * Initiate a "freeze/thaw" operation in the guest.
+ * We respond to the host once the operation is complete.
+ *
+ * We send the message to the user space daemon and the operation is
+ * performed in the daemon.
+ */
+ case VSS_OP_THAW:
+ case VSS_OP_FREEZE:
+ case VSS_OP_HOT_BACKUP:
+ if (vss_transaction.state < HVUTIL_READY) {
+ /* Userspace is not registered yet */
+ vss_respond_to_host(HV_E_FAIL);
+ return;
+ }
+ vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
+ vss_send_op();
+ return;
+ case VSS_OP_GET_DM_INFO:
+ vss_transaction.msg->dm_info.flags = 0;
+ break;
+ default:
+ break;
+ }
+
+ vss_respond_to_host(0);
+ hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+}
+
/*
* Send a response back to the host.
*/
@@ -266,48 +311,8 @@ void hv_vss_onchannelcallback(void *context)
vss_transaction.recv_req_id = requestid;
vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
- switch (vss_msg->vss_hdr.operation) {
- /*
- * Initiate a "freeze/thaw"
- * operation in the guest.
- * We respond to the host once
- * the operation is complete.
- *
- * We send the message to the
- * user space daemon and the
- * operation is performed in
- * the daemon.
- */
- case VSS_OP_FREEZE:
- case VSS_OP_THAW:
- if (vss_transaction.state < HVUTIL_READY) {
- /* Userspace is not registered yet */
- vss_respond_to_host(HV_E_FAIL);
- return;
- }
- vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
- schedule_work(&vss_send_op_work);
- schedule_delayed_work(&vss_timeout_work,
- VSS_USERSPACE_TIMEOUT);
- return;
-
- case VSS_OP_HOT_BACKUP:
- vss_msg->vss_cf.flags =
- VSS_HBU_NO_AUTO_RECOVERY;
- vss_respond_to_host(0);
- return;
-
- case VSS_OP_GET_DM_INFO:
- vss_msg->dm_info.flags = 0;
- vss_respond_to_host(0);
- return;
-
- default:
- vss_respond_to_host(0);
- return;
-
- }
-
+ schedule_work(&vss_handle_request_work);
+ return;
}
icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -358,6 +363,6 @@ void hv_vss_deinit(void)
{
vss_transaction.state = HVUTIL_DEVICE_DYING;
cancel_delayed_work_sync(&vss_timeout_work);
- cancel_work_sync(&vss_send_op_work);
+ cancel_work_sync(&vss_handle_request_work);
hvutil_transport_destroy(hvt);
}
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5acaa2d8e61..4aa3cb63fd41 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -34,22 +34,25 @@
#define SD_MINOR 0
#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
-#define SD_WS2008_MAJOR 1
-#define SD_WS2008_VERSION (SD_WS2008_MAJOR << 16 | SD_MINOR)
+#define SD_MAJOR_1 1
+#define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
-#define TS_MAJOR 3
+#define TS_MAJOR 4
#define TS_MINOR 0
#define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
-#define TS_WS2008_MAJOR 1
-#define TS_WS2008_VERSION (TS_WS2008_MAJOR << 16 | TS_MINOR)
+#define TS_MAJOR_1 1
+#define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
+
+#define TS_MAJOR_3 3
+#define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
#define HB_MAJOR 3
-#define HB_MINOR 0
+#define HB_MINOR 0
#define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
-#define HB_WS2008_MAJOR 1
-#define HB_WS2008_VERSION (HB_WS2008_MAJOR << 16 | HB_MINOR)
+#define HB_MAJOR_1 1
+#define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
static int sd_srv_version;
static int ts_srv_version;
@@ -61,9 +64,14 @@ static struct hv_util_service util_shutdown = {
.util_cb = shutdown_onchannelcallback,
};
+static int hv_timesync_init(struct hv_util_service *srv);
+static void hv_timesync_deinit(void);
+
static void timesync_onchannelcallback(void *context);
static struct hv_util_service util_timesynch = {
.util_cb = timesync_onchannelcallback,
+ .util_init = hv_timesync_init,
+ .util_deinit = hv_timesync_deinit,
};
static void heartbeat_onchannelcallback(void *context);
@@ -161,35 +169,43 @@ static void shutdown_onchannelcallback(void *context)
}
/*
- * Set guest time to host UTC time.
- */
-static inline void do_adj_guesttime(u64 hosttime)
-{
- s64 host_tns;
- struct timespec host_ts;
-
- host_tns = (hosttime - WLTIMEDELTA) * 100;
- host_ts = ns_to_timespec(host_tns);
-
- do_settimeofday(&host_ts);
-}
-
-/*
* Set the host time in a process context.
*/
struct adj_time_work {
struct work_struct work;
u64 host_time;
+ u64 ref_time;
+ u8 flags;
};
static void hv_set_host_time(struct work_struct *work)
{
struct adj_time_work *wrk;
+ s64 host_tns;
+ u64 newtime;
+ struct timespec host_ts;
wrk = container_of(work, struct adj_time_work, work);
- do_adj_guesttime(wrk->host_time);
- kfree(wrk);
+
+ newtime = wrk->host_time;
+ if (ts_srv_version > TS_VERSION_3) {
+ /*
+ * Some latency has been introduced since Hyper-V generated
+ * its time sample. Take that latency into account before
+ * using TSC reference time sample from Hyper-V.
+ *
+ * This sample is given by TimeSync v4 and above hosts.
+ */
+ u64 current_tick;
+
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+ newtime += (current_tick - wrk->ref_time);
+ }
+ host_tns = (newtime - WLTIMEDELTA) * 100;
+ host_ts = ns_to_timespec(host_tns);
+
+ do_settimeofday(&host_ts);
}
/*
@@ -198,33 +214,31 @@ static void hv_set_host_time(struct work_struct *work)
* ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
* After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
* message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
+ * loaded after hv_vmbus, the first message is usually missed. This bit is
+ * considered a hard request to discipline the clock.
+ *
+ * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
+ * typically used as a hint to the guest. The guest is under no obligation
+ * to discipline the clock.
*/
-static inline void adj_guesttime(u64 hosttime, u8 flags)
+static struct adj_time_work wrk;
+static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 flags)
{
- struct adj_time_work *wrk;
- static s32 scnt = 50;
- wrk = kmalloc(sizeof(struct adj_time_work), GFP_ATOMIC);
- if (wrk == NULL)
+ /*
+ * This check is safe since we are executing in the
+ * interrupt context and time synch messages arre always
+ * delivered on the same CPU.
+ */
+ if (work_pending(&wrk.work))
return;
- wrk->host_time = hosttime;
- if ((flags & ICTIMESYNCFLAG_SYNC) != 0) {
- INIT_WORK(&wrk->work, hv_set_host_time);
- schedule_work(&wrk->work);
- return;
+ wrk.host_time = hosttime;
+ wrk.ref_time = reftime;
+ wrk.flags = flags;
+ if ((flags & (ICTIMESYNCFLAG_SYNC | ICTIMESYNCFLAG_SAMPLE)) != 0) {
+ schedule_work(&wrk.work);
}
-
- if ((flags & ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) {
- scnt--;
- INIT_WORK(&wrk->work, hv_set_host_time);
- schedule_work(&wrk->work);
- } else
- kfree(wrk);
}
/*
@@ -237,6 +251,7 @@ static void timesync_onchannelcallback(void *context)
u64 requestid;
struct icmsg_hdr *icmsghdrp;
struct ictimesync_data *timedatap;
+ struct ictimesync_ref_data *refdata;
u8 *time_txf_buf = util_timesynch.recv_buffer;
struct icmsg_negotiate *negop = NULL;
@@ -252,11 +267,27 @@ static void timesync_onchannelcallback(void *context)
time_txf_buf,
util_fw_version,
ts_srv_version);
+ pr_info("Using TimeSync version %d.%d\n",
+ ts_srv_version >> 16, ts_srv_version & 0xFFFF);
} else {
- timedatap = (struct ictimesync_data *)&time_txf_buf[
- sizeof(struct vmbuspipe_hdr) +
- sizeof(struct icmsg_hdr)];
- adj_guesttime(timedatap->parenttime, timedatap->flags);
+ if (ts_srv_version > TS_VERSION_3) {
+ refdata = (struct ictimesync_ref_data *)
+ &time_txf_buf[
+ sizeof(struct vmbuspipe_hdr) +
+ sizeof(struct icmsg_hdr)];
+
+ adj_guesttime(refdata->parenttime,
+ refdata->vmreferencetime,
+ refdata->flags);
+ } else {
+ timedatap = (struct ictimesync_data *)
+ &time_txf_buf[
+ sizeof(struct vmbuspipe_hdr) +
+ sizeof(struct icmsg_hdr)];
+ adj_guesttime(timedatap->parenttime,
+ 0,
+ timedatap->flags);
+ }
}
icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -350,16 +381,21 @@ static int util_probe(struct hv_device *dev,
switch (vmbus_proto_version) {
case (VERSION_WS2008):
util_fw_version = UTIL_WS2K8_FW_VERSION;
- sd_srv_version = SD_WS2008_VERSION;
- ts_srv_version = TS_WS2008_VERSION;
- hb_srv_version = HB_WS2008_VERSION;
+ sd_srv_version = SD_VERSION_1;
+ ts_srv_version = TS_VERSION_1;
+ hb_srv_version = HB_VERSION_1;
break;
-
- default:
+ case(VERSION_WIN10):
util_fw_version = UTIL_FW_VERSION;
sd_srv_version = SD_VERSION;
ts_srv_version = TS_VERSION;
hb_srv_version = HB_VERSION;
+ break;
+ default:
+ util_fw_version = UTIL_FW_VERSION;
+ sd_srv_version = SD_VERSION;
+ ts_srv_version = TS_VERSION_3;
+ hb_srv_version = HB_VERSION;
}
ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
@@ -427,6 +463,17 @@ static struct hv_driver util_drv = {
.remove = util_remove,
};
+static int hv_timesync_init(struct hv_util_service *srv)
+{
+ INIT_WORK(&wrk.work, hv_set_host_time);
+ return 0;
+}
+
+static void hv_timesync_deinit(void)
+{
+ cancel_work_sync(&wrk.work);
+}
+
static int __init init_hyperv_utils(void)
{
pr_info("Registering HyperV Utility Driver\n");
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 9a9983fa4531..c235a9515267 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -72,6 +72,10 @@ static ssize_t hvt_op_read(struct file *file, char __user *buf,
hvt->outmsg = NULL;
hvt->outmsg_len = 0;
+ if (hvt->on_read)
+ hvt->on_read();
+ hvt->on_read = NULL;
+
out_unlock:
mutex_unlock(&hvt->lock);
return ret;
@@ -219,7 +223,8 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
mutex_unlock(&hvt->lock);
}
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+ void (*on_read_cb)(void))
{
struct cn_msg *cn_msg;
int ret = 0;
@@ -237,6 +242,13 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
memcpy(cn_msg->data, msg, len);
ret = cn_netlink_send(cn_msg, 0, 0, GFP_ATOMIC);
kfree(cn_msg);
+ /*
+ * We don't know when netlink messages are delivered but unlike
+ * in CHARDEV mode we're not blocked and we can send next
+ * messages right away.
+ */
+ if (on_read_cb)
+ on_read_cb();
return ret;
}
/* HVUTIL_TRANSPORT_CHARDEV */
@@ -255,6 +267,7 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
if (hvt->outmsg) {
memcpy(hvt->outmsg, msg, len);
hvt->outmsg_len = len;
+ hvt->on_read = on_read_cb;
wake_up_interruptible(&hvt->outmsg_q);
} else
ret = -ENOMEM;
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index 06254a165a18..d98f5225c3e6 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -36,6 +36,7 @@ struct hvutil_transport {
struct list_head list; /* hvt_list */
int (*on_msg)(void *, int); /* callback on new user message */
void (*on_reset)(void); /* callback when userspace drops */
+ void (*on_read)(void); /* callback on message read */
u8 *outmsg; /* message to the userspace */
int outmsg_len; /* its length */
wait_queue_head_t outmsg_q; /* poll/read wait queue */
@@ -46,7 +47,8 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
u32 cn_idx, u32 cn_val,
int (*on_msg)(void *, int),
void (*on_reset)(void));
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len);
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+ void (*on_read_cb)(void));
void hvutil_transport_destroy(struct hvutil_transport *hvt);
#endif /* _HV_UTILS_TRANSPORT_H */
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 718b5c72f0c8..a5b4442433c8 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -495,7 +495,7 @@ struct hv_ring_buffer_debug_info {
extern int hv_init(void);
-extern void hv_cleanup(void);
+extern void hv_cleanup(bool crash);
extern int hv_post_message(union hv_connection_id connection_id,
enum hv_message_type message_type,
@@ -522,14 +522,15 @@ extern unsigned int host_info_edx;
/* Interface */
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer,
- u32 buflen);
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+ struct page *pages, u32 pagecnt);
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
struct kvec *kv_list,
- u32 kv_count, bool *signal, bool lock);
+ u32 kv_count, bool *signal, bool lock,
+ enum hv_signal_policy policy);
int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index fe586bf74e17..08043da1a61c 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -27,6 +27,8 @@
#include <linux/mm.h>
#include <linux/hyperv.h>
#include <linux/uio.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include "hyperv_vmbus.h"
@@ -66,12 +68,20 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi)
* arrived.
*/
-static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
+static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
+ enum hv_signal_policy policy)
{
virt_mb();
if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
return false;
+ /*
+ * When the client wants to control signaling,
+ * we only honour the host interrupt mask.
+ */
+ if (policy == HV_SIGNAL_POLICY_EXPLICIT)
+ return true;
+
/* check interrupt_mask before read_index */
virt_rmb();
/*
@@ -162,18 +172,7 @@ static u32 hv_copyfrom_ringbuffer(
void *ring_buffer = hv_get_ring_buffer(ring_info);
u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
- u32 frag_len;
-
- /* wrap-around detected at the src */
- if (destlen > ring_buffer_size - start_read_offset) {
- frag_len = ring_buffer_size - start_read_offset;
-
- memcpy(dest, ring_buffer + start_read_offset, frag_len);
- memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
- } else
-
- memcpy(dest, ring_buffer + start_read_offset, destlen);
-
+ memcpy(dest, ring_buffer + start_read_offset, destlen);
start_read_offset += destlen;
start_read_offset %= ring_buffer_size;
@@ -194,15 +193,8 @@ static u32 hv_copyto_ringbuffer(
{
void *ring_buffer = hv_get_ring_buffer(ring_info);
u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
- u32 frag_len;
- /* wrap-around detected! */
- if (srclen > ring_buffer_size - start_write_offset) {
- frag_len = ring_buffer_size - start_write_offset;
- memcpy(ring_buffer + start_write_offset, src, frag_len);
- memcpy(ring_buffer, src + frag_len, srclen - frag_len);
- } else
- memcpy(ring_buffer + start_write_offset, src, srclen);
+ memcpy(ring_buffer + start_write_offset, src, srclen);
start_write_offset += srclen;
start_write_offset %= ring_buffer_size;
@@ -235,22 +227,46 @@ void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
/* Initialize the ring buffer. */
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
- void *buffer, u32 buflen)
+ struct page *pages, u32 page_cnt)
{
- if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
- return -EINVAL;
+ int i;
+ struct page **pages_wraparound;
+
+ BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
- ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
+ /*
+ * First page holds struct hv_ring_buffer, do wraparound mapping for
+ * the rest.
+ */
+ pages_wraparound = kzalloc(sizeof(struct page *) * (page_cnt * 2 - 1),
+ GFP_KERNEL);
+ if (!pages_wraparound)
+ return -ENOMEM;
+
+ pages_wraparound[0] = pages;
+ for (i = 0; i < 2 * (page_cnt - 1); i++)
+ pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+ ring_info->ring_buffer = (struct hv_ring_buffer *)
+ vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+ kfree(pages_wraparound);
+
+
+ if (!ring_info->ring_buffer)
+ return -ENOMEM;
+
ring_info->ring_buffer->read_index =
ring_info->ring_buffer->write_index = 0;
/* Set the feature bit for enabling flow control. */
ring_info->ring_buffer->feature_bits.value = 1;
- ring_info->ring_size = buflen;
- ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
+ ring_info->ring_size = page_cnt << PAGE_SHIFT;
+ ring_info->ring_datasize = ring_info->ring_size -
+ sizeof(struct hv_ring_buffer);
spin_lock_init(&ring_info->ring_lock);
@@ -260,11 +276,13 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
/* Cleanup the ring buffer. */
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
{
+ vunmap(ring_info->ring_buffer);
}
/* Write to the ring buffer. */
int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
- struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock,
+ enum hv_signal_policy policy)
{
int i = 0;
u32 bytes_avail_towrite;
@@ -326,7 +344,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
if (lock)
spin_unlock_irqrestore(&outring_info->ring_lock, flags);
- *signal = hv_need_to_signal(old_write, outring_info);
+ *signal = hv_need_to_signal(old_write, outring_info, policy);
return 0;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e82f7e1c217c..a259e18d22d5 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -105,8 +105,8 @@ static struct notifier_block hyperv_panic_block = {
static const char *fb_mmio_name = "fb_range";
static struct resource *fb_mmio;
-struct resource *hyperv_mmio;
-DEFINE_SEMAPHORE(hyperv_mmio_lock);
+static struct resource *hyperv_mmio;
+static DEFINE_SEMAPHORE(hyperv_mmio_lock);
static int vmbus_exists(void)
{
@@ -874,7 +874,7 @@ err_alloc:
bus_unregister(&hv_bus);
err_cleanup:
- hv_cleanup();
+ hv_cleanup(false);
return ret;
}
@@ -961,8 +961,8 @@ int vmbus_device_register(struct hv_device *child_device_obj)
{
int ret = 0;
- dev_set_name(&child_device_obj->device, "vmbus_%d",
- child_device_obj->channel->id);
+ dev_set_name(&child_device_obj->device, "vmbus-%pUl",
+ child_device_obj->channel->offermsg.offer.if_instance.b);
child_device_obj->device.bus = &hv_bus;
child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -1326,7 +1326,7 @@ static void hv_kexec_handler(void)
vmbus_initiate_unload(false);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
- hv_cleanup();
+ hv_cleanup(false);
};
static void hv_crash_handler(struct pt_regs *regs)
@@ -1338,7 +1338,7 @@ static void hv_crash_handler(struct pt_regs *regs)
* for kdump.
*/
hv_synic_cleanup(NULL);
- hv_cleanup();
+ hv_cleanup(true);
};
static int __init hv_acpi_init(void)
@@ -1398,7 +1398,7 @@ static void __exit vmbus_exit(void)
&hyperv_panic_block);
}
bus_unregister(&hv_bus);
- hv_cleanup();
+ hv_cleanup(false);
for_each_online_cpu(cpu) {
tasklet_kill(hv_context.event_dpc[cpu]);
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);