drivers/hv/hv_balloon.c

   1 /*
   2  * Copyright (c) 2012, Microsoft Corporation.
   3  *
   4  * Author:
   5  *   K. Y. Srinivasan <kys@microsoft.com>
   6  *
   7  * This program is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 as published
   9  * by the Free Software Foundation.
  10  *
  11  * This program is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  14  * NON INFRINGEMENT.  See the GNU General Public License for more
  15  * details.
  16  *
  17  */
  18
  19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21 #include <linux/kernel.h>
  22 #include <linux/mman.h>
  23 #include <linux/delay.h>
  24 #include <linux/init.h>
  25 #include <linux/module.h>
  26 #include <linux/slab.h>
  27 #include <linux/kthread.h>
  28 #include <linux/completion.h>
  29 #include <linux/memory_hotplug.h>
  30 #include <linux/memory.h>
  31 #include <linux/notifier.h>
  32 #include <linux/mman.h>
  33 #include <linux/percpu_counter.h>
  34
  35 #include <linux/hyperv.h>
  36
  37 /*
  38  * We begin with definitions supporting the Dynamic Memory protocol
  39  * with the host.
  40  *
  41  * Begin protocol definitions.
  42  */
  43
  44
  45
  46 /*
  47  * Protocol versions. The low word is the minor version, the high word the major
  48  * version.
  49  *
  50  * History:
  51  * Initial version 1.0
  52  * Changed to 0.1 on 2009/03/25
  53  * Changes to 0.2 on 2009/05/14
  54  * Changes to 0.3 on 2009/12/03
  55  * Changed to 1.0 on 2011/04/05
  56  */
  57
  58 #define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor)))
  59 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
  60 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
  61
  62 enum {
  63         DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
  64         DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
  65
  66         DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
  67         DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
  68
  69         DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8
  70 };
  71
  72
  73
  74 /*
  75  * Message Types
  76  */
  77
  78 enum dm_message_type {
  79         /*
  80          * Version 0.3
  81          */
  82         DM_ERROR                        = 0,
  83         DM_VERSION_REQUEST              = 1,
  84         DM_VERSION_RESPONSE             = 2,
  85         DM_CAPABILITIES_REPORT          = 3,
  86         DM_CAPABILITIES_RESPONSE        = 4,
  87         DM_STATUS_REPORT                = 5,
  88         DM_BALLOON_REQUEST              = 6,
  89         DM_BALLOON_RESPONSE             = 7,
  90         DM_UNBALLOON_REQUEST            = 8,
  91         DM_UNBALLOON_RESPONSE           = 9,
  92         DM_MEM_HOT_ADD_REQUEST          = 10,
  93         DM_MEM_HOT_ADD_RESPONSE         = 11,
  94         DM_VERSION_03_MAX               = 11,
  95         /*
  96          * Version 1.0.
  97          */
  98         DM_INFO_MESSAGE                 = 12,
  99         DM_VERSION_1_MAX                = 12
 100 };
 101
 102
 103 /*
 104  * Structures defining the dynamic memory management
 105  * protocol.
 106  */
 107
 108 union dm_version {
 109         struct {
 110                 __u16 minor_version;
 111                 __u16 major_version;
 112         };
 113         __u32 version;
 114 } __packed;
 115
 116
 117 union dm_caps {
 118         struct {
 119                 __u64 balloon:1;
 120                 __u64 hot_add:1;
 121                 __u64 reservedz:62;
 122         } cap_bits;
 123         __u64 caps;
 124 } __packed;
 125
 126 union dm_mem_page_range {
 127         struct  {
 128                 /*
 129                  * The PFN number of the first page in the range.
 130                  * 40 bits is the architectural limit of a PFN
 131                  * number for AMD64.
 132                  */
 133                 __u64 start_page:40;
 134                 /*
 135                  * The number of pages in the range.
 136                  */
 137                 __u64 page_cnt:24;
 138         } finfo;
 139         __u64  page_range;
 140 } __packed;
 141
 142
 143
 144 /*
 145  * The header for all dynamic memory messages:
 146  *
 147  * type: Type of the message.
 148  * size: Size of the message in bytes; including the header.
 149  * trans_id: The guest is responsible for manufacturing this ID.
 150  */
 151
 152 struct dm_header {
 153         __u16 type;
 154         __u16 size;
 155         __u32 trans_id;
 156 } __packed;
 157
 158 /*
 159  * A generic message format for dynamic memory.
 160  * Specific message formats are defined later in the file.
 161  */
 162
 163 struct dm_message {
 164         struct dm_header hdr;
 165         __u8 data[]; /* enclosed message */
 166 } __packed;
 167
 168
 169 /*
 170  * Specific message types supporting the dynamic memory protocol.
 171  */
 172
 173 /*
 174  * Version negotiation message. Sent from the guest to the host.
 175  * The guest is free to try different versions until the host
 176  * accepts the version.
 177  *
 178  * dm_version: The protocol version requested.
 179  * is_last_attempt: If TRUE, this is the last version guest will request.
 180  * reservedz: Reserved field, set to zero.
 181  */
 182
 183 struct dm_version_request {
 184         struct dm_header hdr;
 185         union dm_version version;
 186         __u32 is_last_attempt:1;
 187         __u32 reservedz:31;
 188 } __packed;
 189
 190 /*
 191  * Version response message; Host to Guest and indicates
 192  * if the host has accepted the version sent by the guest.
 193  *
 194  * is_accepted: If TRUE, host has accepted the version and the guest
 195  * should proceed to the next stage of the protocol. FALSE indicates that
 196  * guest should re-try with a different version.
 197  *
 198  * reservedz: Reserved field, set to zero.
 199  */
 200
 201 struct dm_version_response {
 202         struct dm_header hdr;
 203         __u64 is_accepted:1;
 204         __u64 reservedz:63;
 205 } __packed;
 206
 207 /*
 208  * Message reporting capabilities. This is sent from the guest to the
 209  * host.
 210  */
 211
 212 struct dm_capabilities {
 213         struct dm_header hdr;
 214         union dm_caps caps;
 215         __u64 min_page_cnt;
 216         __u64 max_page_number;
 217 } __packed;
 218
 219 /*
 220  * Response to the capabilities message. This is sent from the host to the
 221  * guest. This message notifies if the host has accepted the guest's
 222  * capabilities. If the host has not accepted, the guest must shutdown
 223  * the service.
 224  *
 225  * is_accepted: Indicates if the host has accepted guest's capabilities.
 226  * reservedz: Must be 0.
 227  */
 228
 229 struct dm_capabilities_resp_msg {
 230         struct dm_header hdr;
 231         __u64 is_accepted:1;
 232         __u64 reservedz:63;
 233 } __packed;
 234
 235 /*
 236  * This message is used to report memory pressure from the guest.
 237  * This message is not part of any transaction and there is no
 238  * response to this message.
 239  *
 240  * num_avail: Available memory in pages.
 241  * num_committed: Committed memory in pages.
 242  * page_file_size: The accumulated size of all page files
 243  *                 in the system in pages.
 244  * zero_free: The nunber of zero and free pages.
 245  * page_file_writes: The writes to the page file in pages.
 246  * io_diff: An indicator of file cache efficiency or page file activity,
 247  *          calculated as File Cache Page Fault Count - Page Read Count.
 248  *          This value is in pages.
 249  *
 250  * Some of these metrics are Windows specific and fortunately
 251  * the algorithm on the host side that computes the guest memory
 252  * pressure only uses num_committed value.
 253  */
 254
 255 struct dm_status {
 256         struct dm_header hdr;
 257         __u64 num_avail;
 258         __u64 num_committed;
 259         __u64 page_file_size;
 260         __u64 zero_free;
 261         __u32 page_file_writes;
 262         __u32 io_diff;
 263 } __packed;
 264
 265
 266 /*
 267  * Message to ask the guest to allocate memory - balloon up message.
 268  * This message is sent from the host to the guest. The guest may not be
 269  * able to allocate as much memory as requested.
 270  *
 271  * num_pages: number of pages to allocate.
 272  */
 273
 274 struct dm_balloon {
 275         struct dm_header hdr;
 276         __u32 num_pages;
 277         __u32 reservedz;
 278 } __packed;
 279
 280
 281 /*
 282  * Balloon response message; this message is sent from the guest
 283  * to the host in response to the balloon message.
 284  *
 285  * reservedz: Reserved; must be set to zero.
 286  * more_pages: If FALSE, this is the last message of the transaction.
 287  * if TRUE there will atleast one more message from the guest.
 288  *
 289  * range_count: The number of ranges in the range array.
 290  *
 291  * range_array: An array of page ranges returned to the host.
 292  *
 293  */
 294
 295 struct dm_balloon_response {
 296         struct dm_header hdr;
 297         __u32 reservedz;
 298         __u32 more_pages:1;
 299         __u32 range_count:31;
 300         union dm_mem_page_range range_array[];
 301 } __packed;
 302
 303 /*
 304  * Un-balloon message; this message is sent from the host
 305  * to the guest to give guest more memory.
 306  *
 307  * more_pages: If FALSE, this is the last message of the transaction.
 308  * if TRUE there will atleast one more message from the guest.
 309  *
 310  * reservedz: Reserved; must be set to zero.
 311  *
 312  * range_count: The number of ranges in the range array.
 313  *
 314  * range_array: An array of page ranges returned to the host.
 315  *
 316  */
 317
 318 struct dm_unballoon_request {
 319         struct dm_header hdr;
 320         __u32 more_pages:1;
 321         __u32 reservedz:31;
 322         __u32 range_count;
 323         union dm_mem_page_range range_array[];
 324 } __packed;
 325
 326 /*
 327  * Un-balloon response message; this message is sent from the guest
 328  * to the host in response to an unballoon request.
 329  *
 330  */
 331
 332 struct dm_unballoon_response {
 333         struct dm_header hdr;
 334 } __packed;
 335
 336
 337 /*
 338  * Hot add request message. Message sent from the host to the guest.
 339  *
 340  * mem_range: Memory range to hot add.
 341  *
 342  * On Linux we currently don't support this since we cannot hot add
 343  * arbitrary granularity of memory.
 344  */
 345
 346 struct dm_hot_add {
 347         struct dm_header hdr;
 348         union dm_mem_page_range range;
 349 } __packed;
 350
 351 /*
 352  * Hot add response message.
 353  * This message is sent by the guest to report the status of a hot add request.
 354  * If page_count is less than the requested page count, then the host should
 355  * assume all further hot add requests will fail, since this indicates that
 356  * the guest has hit an upper physical memory barrier.
 357  *
 358  * Hot adds may also fail due to low resources; in this case, the guest must
 359  * not complete this message until the hot add can succeed, and the host must
 360  * not send a new hot add request until the response is sent.
 361  * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
 362  * times it fails the request.
 363  *
 364  *
 365  * page_count: number of pages that were successfully hot added.
 366  *
 367  * result: result of the operation 1: success, 0: failure.
 368  *
 369  */
 370
 371 struct dm_hot_add_response {
 372         struct dm_header hdr;
 373         __u32 page_count;
 374         __u32 result;
 375 } __packed;
 376
 377 /*
 378  * Types of information sent from host to the guest.
 379  */
 380
 381 enum dm_info_type {
 382         INFO_TYPE_MAX_PAGE_CNT = 0,
 383         MAX_INFO_TYPE
 384 };
 385
 386
 387 /*
 388  * Header for the information message.
 389  */
 390
 391 struct dm_info_header {
 392         enum dm_info_type type;
 393         __u32 data_size;
 394 } __packed;
 395
 396 /*
 397  * This message is sent from the host to the guest to pass
 398  * some relevant information (win8 addition).
 399  *
 400  * reserved: no used.
 401  * info_size: size of the information blob.
 402  * info: information blob.
 403  */
 404
 405 struct dm_info_msg {
 406         struct dm_info_header header;
 407         __u32 reserved;
 408         __u32 info_size;
 409         __u8  info[];
 410 };
 411
 412 /*
 413  * End protocol definitions.
 414  */
 415
 416 static bool hot_add;
 417 static bool do_hot_add;
 418
 419 module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
 420 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
 421
 422 static atomic_t trans_id = ATOMIC_INIT(0);
 423
 424 static int dm_ring_size = (5 * PAGE_SIZE);
 425
 426 /*
 427  * Driver specific state.
 428  */
 429
 430 enum hv_dm_state {
 431         DM_INITIALIZING = 0,
 432         DM_INITIALIZED,
 433         DM_BALLOON_UP,
 434         DM_BALLOON_DOWN,
 435         DM_HOT_ADD,
 436         DM_INIT_ERROR
 437 };
 438
 439
 440 static __u8 recv_buffer[PAGE_SIZE];
 441 static __u8 *send_buffer;
 442 #define PAGES_IN_2M     512
 443
 444 struct hv_dynmem_device {
 445         struct hv_device *dev;
 446         enum hv_dm_state state;
 447         struct completion host_event;
 448         struct completion config_event;
 449
 450         /*
 451          * Number of pages we have currently ballooned out.
 452          */
 453         unsigned int num_pages_ballooned;
 454
 455         /*
 456          * This thread handles both balloon/hot-add
 457          * requests from the host as well as notifying
 458          * the host with regards to memory pressure in
 459          * the guest.
 460          */
 461         struct task_struct *thread;
 462
 463         /*
 464          * We start with the highest version we can support
 465          * and downgrade based on the host; we save here the
 466          * next version to try.
 467          */
 468         __u32 next_version;
 469 };
 470
 471 static struct hv_dynmem_device dm_device;
 472
 473 static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg)
 474 {
 475
 476         struct dm_hot_add_response resp;
 477
 478         if (do_hot_add) {
 479
 480                 pr_info("Memory hot add not supported\n");
 481
 482                 /*
 483                  * Currently we do not support hot add.
 484                  * Just fail the request.
 485                  */
 486         }
 487
 488         memset(&resp, 0, sizeof(struct dm_hot_add_response));
 489         resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
 490         resp.hdr.size = sizeof(struct dm_hot_add_response);
 491         resp.hdr.trans_id = atomic_inc_return(&trans_id);
 492
 493         resp.page_count = 0;
 494         resp.result = 0;
 495
 496         dm->state = DM_INITIALIZED;
 497         vmbus_sendpacket(dm->dev->channel, &resp,
 498                         sizeof(struct dm_hot_add_response),
 499                         (unsigned long)NULL,
 500                         VM_PKT_DATA_INBAND, 0);
 501
 502 }
 503
 504 static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
 505 {
 506         switch (msg->header.type) {
 507         case INFO_TYPE_MAX_PAGE_CNT:
 508                 pr_info("Received INFO_TYPE_MAX_PAGE_CNT\n");
 509                 pr_info("Data Size is %d\n", msg->header.data_size);
 510                 break;
 511         default:
 512                 pr_info("Received Unknown type: %d\n", msg->header.type);
 513         }
 514 }
 515
 516 /*
 517  * Post our status as it relates memory pressure to the
 518  * host. Host expects the guests to post this status
 519  * periodically at 1 second intervals.
 520  *
 521  * The metrics specified in this protocol are very Windows
 522  * specific and so we cook up numbers here to convey our memory
 523  * pressure.
 524  */
 525
 526 static void post_status(struct hv_dynmem_device *dm)
 527 {
 528         struct dm_status status;
 529
 530
 531         memset(&status, 0, sizeof(struct dm_status));
 532         status.hdr.type = DM_STATUS_REPORT;
 533         status.hdr.size = sizeof(struct dm_status);
 534         status.hdr.trans_id = atomic_inc_return(&trans_id);
 535
 536
 537         status.num_committed = vm_memory_committed();
 538
 539         vmbus_sendpacket(dm->dev->channel, &status,
 540                                 sizeof(struct dm_status),
 541                                 (unsigned long)NULL,
 542                                 VM_PKT_DATA_INBAND, 0);
 543
 544 }
 545
 546
 547
 548 static void free_balloon_pages(struct hv_dynmem_device *dm,
 549                          union dm_mem_page_range *range_array)
 550 {
 551         int num_pages = range_array->finfo.page_cnt;
 552         __u64 start_frame = range_array->finfo.start_page;
 553         struct page *pg;
 554         int i;
 555
 556         for (i = 0; i < num_pages; i++) {
 557                 pg = pfn_to_page(i + start_frame);
 558                 __free_page(pg);
 559                 dm->num_pages_ballooned--;
 560         }
 561 }
 562
 563
 564
 565 static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
 566                          struct dm_balloon_response *bl_resp, int alloc_unit,
 567                          bool *alloc_error)
 568 {
 569         int i = 0;
 570         struct page *pg;
 571
 572         if (num_pages < alloc_unit)
 573                 return 0;
 574
 575         for (i = 0; (i * alloc_unit) < num_pages; i++) {
 576                 if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
 577                         PAGE_SIZE)
 578                         return i * alloc_unit;
 579
 580                 /*
 581                  * We execute this code in a thread context. Furthermore,
 582                  * we don't want the kernel to try too hard.
 583                  */
 584                 pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY |
 585                                 __GFP_NOMEMALLOC | __GFP_NOWARN,
 586                                 get_order(alloc_unit << PAGE_SHIFT));
 587
 588                 if (!pg) {
 589                         *alloc_error = true;
 590                         return i * alloc_unit;
 591                 }
 592
 593
 594                 dm->num_pages_ballooned += alloc_unit;
 595
 596                 bl_resp->range_count++;
 597                 bl_resp->range_array[i].finfo.start_page =
 598                         page_to_pfn(pg);
 599                 bl_resp->range_array[i].finfo.page_cnt = alloc_unit;
 600                 bl_resp->hdr.size += sizeof(union dm_mem_page_range);
 601
 602         }
 603
 604         return num_pages;
 605 }
 606
 607
 608
 609 static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req)
 610 {
 611         int num_pages = req->num_pages;
 612         int num_ballooned = 0;
 613         struct dm_balloon_response *bl_resp;
 614         int alloc_unit;
 615         int ret;
 616         bool alloc_error = false;
 617         bool done = false;
 618         int i;
 619
 620
 621         /*
 622          * Currently, we only support 4k allocations.
 623          */
 624         alloc_unit = 1;
 625
 626         while (!done) {
 627                 bl_resp = (struct dm_balloon_response *)send_buffer;
 628                 memset(send_buffer, 0, PAGE_SIZE);
 629                 bl_resp->hdr.type = DM_BALLOON_RESPONSE;
 630                 bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
 631                 bl_resp->hdr.size = sizeof(struct dm_balloon_response);
 632                 bl_resp->more_pages = 1;
 633
 634
 635                 num_pages -= num_ballooned;
 636                 num_ballooned = alloc_balloon_pages(dm, num_pages,
 637                                                 bl_resp, alloc_unit,
 638                                                  &alloc_error);
 639
 640                 if ((alloc_error) || (num_ballooned == num_pages)) {
 641                         bl_resp->more_pages = 0;
 642                         done = true;
 643                         dm->state = DM_INITIALIZED;
 644                 }
 645
 646                 /*
 647                  * We are pushing a lot of data through the channel;
 648                  * deal with transient failures caused because of the
 649                  * lack of space in the ring buffer.
 650                  */
 651
 652                 do {
 653                         ret = vmbus_sendpacket(dm_device.dev->channel,
 654                                                 bl_resp,
 655                                                 bl_resp->hdr.size,
 656                                                 (unsigned long)NULL,
 657                                                 VM_PKT_DATA_INBAND, 0);
 658
 659                         if (ret == -EAGAIN)
 660                                 msleep(20);
 661
 662                 } while (ret == -EAGAIN);
 663
 664                 if (ret) {
 665                         /*
 666                          * Free up the memory we allocatted.
 667                          */
 668                         pr_info("Balloon response failed\n");
 669
 670                         for (i = 0; i < bl_resp->range_count; i++)
 671                                 free_balloon_pages(dm,
 672                                                  &bl_resp->range_array[i]);
 673
 674                         done = true;
 675                 }
 676         }
 677
 678 }
 679
 680 static void balloon_down(struct hv_dynmem_device *dm,
 681                         struct dm_unballoon_request *req)
 682 {
 683         union dm_mem_page_range *range_array = req->range_array;
 684         int range_count = req->range_count;
 685         struct dm_unballoon_response resp;
 686         int i;
 687
 688         for (i = 0; i < range_count; i++)
 689                 free_balloon_pages(dm, &range_array[i]);
 690
 691         if (req->more_pages == 1)
 692                 return;
 693
 694         memset(&resp, 0, sizeof(struct dm_unballoon_response));
 695         resp.hdr.type = DM_UNBALLOON_RESPONSE;
 696         resp.hdr.trans_id = atomic_inc_return(&trans_id);
 697         resp.hdr.size = sizeof(struct dm_unballoon_response);
 698
 699         vmbus_sendpacket(dm_device.dev->channel, &resp,
 700                                 sizeof(struct dm_unballoon_response),
 701                                 (unsigned long)NULL,
 702                                 VM_PKT_DATA_INBAND, 0);
 703
 704         dm->state = DM_INITIALIZED;
 705 }
 706
 707 static void balloon_onchannelcallback(void *context);
 708
 709 static int dm_thread_func(void *dm_dev)
 710 {
 711         struct hv_dynmem_device *dm = dm_dev;
 712         int t;
 713         unsigned long  scan_start;
 714
 715         while (!kthread_should_stop()) {
 716                 t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
 717                 /*
 718                  * The host expects us to post information on the memory
 719                  * pressure every second.
 720                  */
 721
 722                 if (t == 0)
 723                         post_status(dm);
 724
 725                 scan_start = jiffies;
 726                 switch (dm->state) {
 727                 case DM_BALLOON_UP:
 728                         balloon_up(dm, (struct dm_balloon *)recv_buffer);
 729                         break;
 730
 731                 case DM_HOT_ADD:
 732                         hot_add_req(dm, (struct dm_hot_add *)recv_buffer);
 733                         break;
 734                 default:
 735                         break;
 736                 }
 737
 738                 if (!time_in_range(jiffies, scan_start, scan_start + HZ))
 739                         post_status(dm);
 740
 741         }
 742
 743         return 0;
 744 }
 745
 746
 747 static void version_resp(struct hv_dynmem_device *dm,
 748                         struct dm_version_response *vresp)
 749 {
 750         struct dm_version_request version_req;
 751         int ret;
 752
 753         if (vresp->is_accepted) {
 754                 /*
 755                  * We are done; wakeup the
 756                  * context waiting for version
 757                  * negotiation.
 758                  */
 759                 complete(&dm->host_event);
 760                 return;
 761         }
 762         /*
 763          * If there are more versions to try, continue
 764          * with negotiations; if not
 765          * shutdown the service since we are not able
 766          * to negotiate a suitable version number
 767          * with the host.
 768          */
 769         if (dm->next_version == 0)
 770                 goto version_error;
 771
 772         dm->next_version = 0;
 773         memset(&version_req, 0, sizeof(struct dm_version_request));
 774         version_req.hdr.type = DM_VERSION_REQUEST;
 775         version_req.hdr.size = sizeof(struct dm_version_request);
 776         version_req.hdr.trans_id = atomic_inc_return(&trans_id);
 777         version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7;
 778         version_req.is_last_attempt = 1;
 779
 780         ret = vmbus_sendpacket(dm->dev->channel, &version_req,
 781                                 sizeof(struct dm_version_request),
 782                                 (unsigned long)NULL,
 783                                 VM_PKT_DATA_INBAND, 0);
 784
 785         if (ret)
 786                 goto version_error;
 787
 788         return;
 789
 790 version_error:
 791         dm->state = DM_INIT_ERROR;
 792         complete(&dm->host_event);
 793 }
 794
 795 static void cap_resp(struct hv_dynmem_device *dm,
 796                         struct dm_capabilities_resp_msg *cap_resp)
 797 {
 798         if (!cap_resp->is_accepted) {
 799                 pr_info("Capabilities not accepted by host\n");
 800                 dm->state = DM_INIT_ERROR;
 801         }
 802         complete(&dm->host_event);
 803 }
 804
 805 static void balloon_onchannelcallback(void *context)
 806 {
 807         struct hv_device *dev = context;
 808         u32 recvlen;
 809         u64 requestid;
 810         struct dm_message *dm_msg;
 811         struct dm_header *dm_hdr;
 812         struct hv_dynmem_device *dm = hv_get_drvdata(dev);
 813
 814         memset(recv_buffer, 0, sizeof(recv_buffer));
 815         vmbus_recvpacket(dev->channel, recv_buffer,
 816                          PAGE_SIZE, &recvlen, &requestid);
 817
 818         if (recvlen > 0) {
 819                 dm_msg = (struct dm_message *)recv_buffer;
 820                 dm_hdr = &dm_msg->hdr;
 821
 822                 switch (dm_hdr->type) {
 823                 case DM_VERSION_RESPONSE:
 824                         version_resp(dm,
 825                                  (struct dm_version_response *)dm_msg);
 826                         break;
 827
 828                 case DM_CAPABILITIES_RESPONSE:
 829                         cap_resp(dm,
 830                                  (struct dm_capabilities_resp_msg *)dm_msg);
 831                         break;
 832
 833                 case DM_BALLOON_REQUEST:
 834                         dm->state = DM_BALLOON_UP;
 835                         complete(&dm->config_event);
 836                         break;
 837
 838                 case DM_UNBALLOON_REQUEST:
 839                         dm->state = DM_BALLOON_DOWN;
 840                         balloon_down(dm,
 841                                  (struct dm_unballoon_request *)recv_buffer);
 842                         break;
 843
 844                 case DM_MEM_HOT_ADD_REQUEST:
 845                         dm->state = DM_HOT_ADD;
 846                         complete(&dm->config_event);
 847                         break;
 848
 849                 case DM_INFO_MESSAGE:
 850                         process_info(dm, (struct dm_info_msg *)dm_msg);
 851                         break;
 852
 853                 default:
 854                         pr_err("Unhandled message: type: %d\n", dm_hdr->type);
 855
 856                 }
 857         }
 858
 859 }
 860
 861 static int balloon_probe(struct hv_device *dev,
 862                         const struct hv_vmbus_device_id *dev_id)
 863 {
 864         int ret, t;
 865         struct dm_version_request version_req;
 866         struct dm_capabilities cap_msg;
 867
 868         do_hot_add = hot_add;
 869
 870         /*
 871          * First allocate a send buffer.
 872          */
 873
 874         send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
 875         if (!send_buffer)
 876                 return -ENOMEM;
 877
 878         ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
 879                         balloon_onchannelcallback, dev);
 880
 881         if (ret)
 882                 return ret;
 883
 884         dm_device.dev = dev;
 885         dm_device.state = DM_INITIALIZING;
 886         dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
 887         init_completion(&dm_device.host_event);
 888         init_completion(&dm_device.config_event);
 889
 890         dm_device.thread =
 891                  kthread_run(dm_thread_func, &dm_device, "hv_balloon");
 892         if (IS_ERR(dm_device.thread)) {
 893                 ret = PTR_ERR(dm_device.thread);
 894                 goto probe_error0;
 895         }
 896
 897         hv_set_drvdata(dev, &dm_device);
 898         /*
 899          * Initiate the hand shake with the host and negotiate
 900          * a version that the host can support. We start with the
 901          * highest version number and go down if the host cannot
 902          * support it.
 903          */
 904         memset(&version_req, 0, sizeof(struct dm_version_request));
 905         version_req.hdr.type = DM_VERSION_REQUEST;
 906         version_req.hdr.size = sizeof(struct dm_version_request);
 907         version_req.hdr.trans_id = atomic_inc_return(&trans_id);
 908         version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8;
 909         version_req.is_last_attempt = 0;
 910
 911         ret = vmbus_sendpacket(dev->channel, &version_req,
 912                                 sizeof(struct dm_version_request),
 913                                 (unsigned long)NULL,
 914                                 VM_PKT_DATA_INBAND,
 915                                 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 916         if (ret)
 917                 goto probe_error1;
 918
 919         t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
 920         if (t == 0) {
 921                 ret = -ETIMEDOUT;
 922                 goto probe_error1;
 923         }
 924
 925         /*
 926          * If we could not negotiate a compatible version with the host
 927          * fail the probe function.
 928          */
 929         if (dm_device.state == DM_INIT_ERROR) {
 930                 ret = -ETIMEDOUT;
 931                 goto probe_error1;
 932         }
 933         /*
 934          * Now submit our capabilities to the host.
 935          */
 936         memset(&cap_msg, 0, sizeof(struct dm_capabilities));
 937         cap_msg.hdr.type = DM_CAPABILITIES_REPORT;
 938         cap_msg.hdr.size = sizeof(struct dm_capabilities);
 939         cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
 940
 941         cap_msg.caps.cap_bits.balloon = 1;
 942         /*
 943          * While we currently don't support hot-add,
 944          * we still advertise this capability since the
 945          * host requires that guests partcipating in the
 946          * dynamic memory protocol support hot add.
 947          */
 948         cap_msg.caps.cap_bits.hot_add = 1;
 949
 950         /*
 951          * Currently the host does not use these
 952          * values and we set them to what is done in the
 953          * Windows driver.
 954          */
 955         cap_msg.min_page_cnt = 0;
 956         cap_msg.max_page_number = -1;
 957
 958         ret = vmbus_sendpacket(dev->channel, &cap_msg,
 959                                 sizeof(struct dm_capabilities),
 960                                 (unsigned long)NULL,
 961                                 VM_PKT_DATA_INBAND,
 962                                 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 963         if (ret)
 964                 goto probe_error1;
 965
 966         t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
 967         if (t == 0) {
 968                 ret = -ETIMEDOUT;
 969                 goto probe_error1;
 970         }
 971
 972         /*
 973          * If the host does not like our capabilities,
 974          * fail the probe function.
 975          */
 976         if (dm_device.state == DM_INIT_ERROR) {
 977                 ret = -ETIMEDOUT;
 978                 goto probe_error1;
 979         }
 980
 981         dm_device.state = DM_INITIALIZED;
 982
 983         return 0;
 984
 985 probe_error1:
 986         kthread_stop(dm_device.thread);
 987
 988 probe_error0:
 989         vmbus_close(dev->channel);
 990         return ret;
 991 }
 992
 993 static int balloon_remove(struct hv_device *dev)
 994 {
 995         struct hv_dynmem_device *dm = hv_get_drvdata(dev);
 996
 997         if (dm->num_pages_ballooned != 0)
 998                 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
 999
1000         vmbus_close(dev->channel);
1001         kthread_stop(dm->thread);
1002
1003         return 0;
1004 }
1005
1006 static const struct hv_vmbus_device_id id_table[] = {
1007         /* Dynamic Memory Class ID */
1008         /* 525074DC-8985-46e2-8057-A307DC18A502 */
1009         { VMBUS_DEVICE(0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46,
1010                        0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02)
1011         },
1012         { },
1013 };
1014
1015 MODULE_DEVICE_TABLE(vmbus, id_table);
1016
1017 static  struct hv_driver balloon_drv = {
1018         .name = "hv_balloon",
1019         .id_table = id_table,
1020         .probe =  balloon_probe,
1021         .remove =  balloon_remove,
1022 };
1023
1024 static int __init init_balloon_drv(void)
1025 {
1026
1027         return vmbus_driver_register(&balloon_drv);
1028 }
1029
1030 static void exit_balloon_drv(void)
1031 {
1032
1033         vmbus_driver_unregister(&balloon_drv);
1034 }
1035
1036 module_init(init_balloon_drv);
1037 module_exit(exit_balloon_drv);
1038
1039 MODULE_DESCRIPTION("Hyper-V Balloon");
1040 MODULE_VERSION(HV_DRV_VERSION);
1041 MODULE_LICENSE("GPL");