/* $NetBSD$ */ /*- * Copyright (c) 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Cherry G. Mathew * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * TODO: xen.balloon xen.balloon.current: DONE xen.balloon.target: IN PROGRESS xen.balloon.low-balloon xen.balloon.high-balloon xen.balloon.limit sysctl labels = { 'current' : 'Current allocation', 'target' : 'Requested target', 'low-balloon' : 'Low-mem balloon', 'high-balloon' : 'High-mem balloon', 'limit' : 'Xen hard limit' } */ #include __KERNEL_RCSID(0, "$NetBSD$"); #include "opt_balloon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BALLOONINTERVALMS 100 /* milliseconds */ /* XXX: fix limits */ #define LOW_BALLOON 100 /* In pages */ #define HIGH_BALLOON SIZE_T_MAX /* In pages */ /* Forward declaration */ static void xenbus_balloon_watcher(struct xenbus_watch *watch, const char **vec, unsigned int len); struct balloon_page_entry { struct vm_page *pg; SLIST_ENTRY(balloon_page_entry) entry; }; static struct balloon_conf { kmutex_t flaglock; /* Protects condvar (below) */ kcondvar_t cv_memchanged; /* Notifier flag for target (below) */ kmutex_t tgtlock; /* Spin lock, protects .target, below */ size_t target; /* Target balloon size, in pages. */ SLIST_HEAD(, balloon_page_entry) balloon_page_entries; } balloon_conf; static struct xenbus_watch xenbus_balloon_watch = { .node = __UNCONST("memory/target"), .xbw_callback = xenbus_balloon_watcher, }; /* Returns zero, on error */ static size_t xenmem_get_maxreservation(void) { int ret; ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, & (domid_t) { DOMID_SELF }); if (ret < 0) { /* XXX: panic() ? */ return 0; } return ret; } /* Returns zero, on error */ static size_t xenmem_get_currentreservation(void) { int ret; ret = HYPERVISOR_memory_op(XENMEM_current_reservation, & (domid_t) { DOMID_SELF }); if (ret < 0) { /* XXX: panic() ? */ return 0; } return ret; } static size_t balloon_get_target(void) { size_t target; mutex_spin_enter(&balloon_conf.tgtlock); target = balloon_conf.target; mutex_spin_exit(&balloon_conf.tgtlock); return target; } static void balloon_set_target(size_t target) { mutex_spin_enter(&balloon_conf.tgtlock); balloon_conf.target = target; mutex_spin_exit(&balloon_conf.tgtlock); return; } static size_t reserve_pages(size_t npages, xen_pfn_t *mfn_list) { struct balloon_page_entry *bpg_entry; size_t newpgcount; paddr_t pfn; for (newpgcount = 0; newpgcount < npages; newpgcount++) { struct vm_page *pg; pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); if (pg == NULL) { break; } pfn = x86_btop(VM_PAGE_TO_PHYS(pg) - XPMAP_OFFSET); mfn_list[newpgcount] = pfn_to_mfn(pfn); /* Invalidate pg */ xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; /* Save mfn */ /* * XXX: We don't keep a copy, but just save a pointer * to the uvm pg handle. Is this ok ? */ bpg_entry = kmem_alloc(sizeof *bpg_entry, KM_SLEEP); if (bpg_entry == NULL) { uvm_pagefree(pg); break; } bpg_entry->pg = pg; SLIST_INSERT_HEAD(&balloon_conf.balloon_page_entries, bpg_entry, entry); } return newpgcount; } static size_t unreserve_pages(size_t ret, xen_pfn_t *mfn_list) { size_t npages; for (npages = 0; npages < ret; npages++) { struct balloon_page_entry *bpg_entry; struct vm_page *pg; paddr_t pfn; int tmp; if (SLIST_EMPTY(&balloon_conf.balloon_page_entries)) { /*XXX: This is the case where extra mem w.r.t boot comes in ? */ printf("Balloon is empty. can't be collapsed further!"); /*XXX: mark down target ? */ break; } bpg_entry = SLIST_FIRST(&balloon_conf.balloon_page_entries); SLIST_REMOVE_HEAD(&balloon_conf.balloon_page_entries, entry); pg = bpg_entry->pg; kmem_free(bpg_entry, sizeof *bpg_entry); /* Update P->M */ pfn = x86_btop(VM_PAGE_TO_PHYS(pg) - XPMAP_OFFSET); xpmap_phys_to_machine_mapping[pfn] = mfn_list[npages]; /* Update the MMU */ mmu_update_t mmu; mmu.ptr = x86_ptob(mfn_list[npages]) | MMU_MACHPHYS_UPDATE; mmu.val = pfn; if (HYPERVISOR_mmu_update(&mmu, 1, &tmp, DOMID_SELF) < 0) { panic("MMU Update failed!"); } /* Free it to UVM */ uvm_pagefree(pg); } return npages; } static void balloon_inflate(size_t npages) { int ret; size_t respgcnt; xen_pfn_t *mfn_list; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; /* * There's a risk that npages might overflow ret. * Do this is smaller steps then. * See: HYPERVISOR_memory_op(...) below.... */ if (npages > INT_MAX) { npages = INT_MAX; } mfn_list = kmem_alloc(npages * sizeof *mfn_list, KM_SLEEP); if (mfn_list == NULL) { printf("%s: Error, could not allocate kernel memory", __FILE__); return; } respgcnt = reserve_pages(npages, mfn_list); /* Hand over pages to Hypervisor */ xenguest_handle(reservation.extent_start) = mfn_list; reservation.nr_extents = respgcnt; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); if (ret < 0) { /* Unroll loop and release page frames back to the OS. */ unreserve_pages(respgcnt, mfn_list); return; } KASSERT(ret == npages); kmem_free(mfn_list, npages * sizeof *mfn_list); printf("inflated by %d\n", ret); return; } static void balloon_deflate(size_t npages) { int ret; size_t pgmax; xen_pfn_t *mfn_list; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; /* * There's a risk that npages might overflow ret. * Do this is smaller steps then. * See: HYPERVISOR_memory_op(...) below.... */ if (npages > INT_MAX) { npages = INT_MAX; } /* XXX: This is wrong. npages is the _delta_. * Trim npages, if its exceeded the hard limit */ if ((pgmax = xenmem_get_maxreservation()) > 0) { pgmax -= xenmem_get_currentreservation(); } if (npages > pgmax && pgmax > 0) { npages = pgmax; } mfn_list = kmem_alloc(npages * sizeof *mfn_list, KM_SLEEP); if (mfn_list == NULL) { printf("%s: Error, could not allocate kernel memory", __FILE__); return; } xenguest_handle(reservation.extent_start) = mfn_list; reservation.nr_extents = npages; ret = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); if (ret <= 0) { panic("Increase reservation failed"); /* NOTREACHED */ return; } npages = unreserve_pages(ret, mfn_list); printf("deflated by %zu\n", npages); return; } static size_t balloon_resize(size_t targetpages) { size_t currentpages; /* Get current number of pages */ currentpages = xenmem_get_currentreservation(); KASSERT(currentpages > 0); if (targetpages == currentpages) { return currentpages; } #if 0 printf("Current pages == %zu\n", currentpages); #endif /* Increase or decrease, accordingly */ if (targetpages > currentpages) { balloon_deflate(targetpages - currentpages); } else { balloon_inflate(currentpages - targetpages); } /* Get the new, adjusted number of pages. */ currentpages = xenmem_get_currentreservation(); KASSERT(currentpages > 0); yield(); return currentpages; } static void balloon_thread(void *ignore) { size_t targetinprogress; const int pollticks = mstohz(BALLOONINTERVALMS); /* * Get target. This will ensure that the wait loop (below) * won't break out until the target is set properly for the * first time. The value of targetinprogress is probably * rubbish. */ targetinprogress = balloon_get_target(); for/*ever*/ ( ;; ) { size_t tgtcache; mutex_enter(&balloon_conf.flaglock); while (balloon_get_target() == targetinprogress) { cv_timedwait(&balloon_conf.cv_memchanged, &balloon_conf.flaglock, pollticks); } tgtcache = balloon_get_target(); #if 0 printf("new target ==> %zu\n", tgtcache); #endif targetinprogress = balloon_resize(tgtcache); mutex_exit(&balloon_conf.flaglock); } } static size_t xenbus_balloon_read_target(void) { unsigned long long new_target; if (0 != xenbus_read_ull(NULL, "memory", "target", &new_target, 0)) { printf("error, couldn't read\n"); return 0; } /* Convert to npages */ return new_target * 1024 / PAGE_SIZE; } static void xenbus_balloon_watcher(struct xenbus_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; if (0 == (new_target = xenbus_balloon_read_target())) { /* Don't update target value */ return; } balloon_set_target(new_target); printf("Setting target to %llu\n", new_target); printf("Current reservation is %zu\n", xenmem_get_currentreservation()); /* Notify balloon thread, if we can. */ if (mutex_tryenter(&balloon_conf.flaglock)) { cv_signal(&balloon_conf.cv_memchanged); mutex_exit(&balloon_conf.flaglock); } return; } void balloon_xenbus_setup(void) { #ifdef XEN_BALLOON /* Setup flaglocks, condvars et. al */ mutex_init(&balloon_conf.flaglock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&balloon_conf.tgtlock, MUTEX_DEFAULT, IPL_HIGH); cv_init(&balloon_conf.cv_memchanged, "ballooning"); SLIST_INIT(&balloon_conf.balloon_page_entries); /* Setup xenbus node watch callback */ if (register_xenbus_watch(&xenbus_balloon_watch)) { aprint_error("%s: unable to watch memory/target\n", __func__); cv_destroy(&balloon_conf.cv_memchanged); mutex_destroy(&balloon_conf.tgtlock); mutex_destroy(&balloon_conf.flaglock); } /* Setup kernel thread to asynchronously (in/de)-flate the balloon */ if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, balloon_thread, NULL /* arg */, NULL, "balloon")) { aprint_error("%s: unable to create balloon thread\n", __func__); unregister_xenbus_watch(&xenbus_balloon_watch); cv_destroy(&balloon_conf.cv_memchanged); mutex_destroy(&balloon_conf.tgtlock); mutex_destroy(&balloon_conf.flaglock); } #endif return; } /* * sysctl(9) stuff */ /* sysctl helper routine */ static int sysctl_kern_xen_balloon(SYSCTLFN_ARGS) { struct sysctlnode node; /* * Assumes SIZE_T_MAX <= ((uint64_t) -1) see createv() in * SYSCTL_SETUP(...) below */ int error; int64_t node_val; int64_t newnode_val; KASSERT(rnode != NULL); node = *rnode; if (strcmp(node.sysctl_name, "current") == 0) { node_val = xenmem_get_currentreservation(); KASSERT(node_val < SIZE_T_MAX); node.sysctl_data = &node_val; return sysctl_lookup(SYSCTLFN_CALL(&node)); } else if (strcmp(node.sysctl_name, "target") == 0) { newnode_val = node_val = balloon_get_target(); node.sysctl_data = &newnode_val; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error != 0) { return error; } /* Sanity check new size */ /* if (newnode_val <= LOW_BALLOON */ /* || newnode_val > HIGH_BALLOON) { */ /* return EINVAL; */ /* } */ KASSERT(node_val < SIZE_T_MAX); if (node_val != newnode_val) { // * (int64_t *) rnode->sysctl_data = newnode_val; printf("setting to %qd", newnode_val); balloon_set_target(newnode_val); } return 0; } return EINVAL; } /* Setup nodes. */ SYSCTL_SETUP(sysctl_kern_xen_balloon_setup, "sysctl kern.xen.balloon setup") { const struct sysctlnode *node = NULL; sysctl_createv(clog, 0, NULL, &node, CTLFLAG_PERMANENT, CTLTYPE_NODE, "kern", NULL, NULL, 0, NULL, 0, CTL_KERN, CTL_EOL); /* XXX: if (node != NULL) */ sysctl_createv(clog, 0, &node, &node, CTLFLAG_PERMANENT, CTLTYPE_NODE, "xen", SYSCTL_DESCR("Xen"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &node, &node, CTLFLAG_PERMANENT, CTLTYPE_NODE, "balloon", SYSCTL_DESCR("Balloon"), NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &node, NULL, CTLFLAG_PERMANENT, CTLTYPE_QUAD, "current", SYSCTL_DESCR("Current balloon size"), sysctl_kern_xen_balloon, 0, NULL, 0, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &node, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_QUAD, "target", SYSCTL_DESCR("Target balloon size"), sysctl_kern_xen_balloon, 0, NULL, 0, CTL_CREATE, CTL_EOL); }