ACPI / processor: Drop unused variable from processor_perflib.c
[cascardo/linux.git] / drivers / staging / zcache / ramster / ramster.c
1 /*
2  * ramster.c
3  *
4  * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
5  *
6  * RAMster implements peer-to-peer transcendent memory, allowing a "cluster" of
7  * kernels to dynamically pool their RAM so that a RAM-hungry workload on one
8  * machine can temporarily and transparently utilize RAM on another machine
9  * which is presumably idle or running a non-RAM-hungry workload.
10  *
11  * RAMster combines a clustering and messaging foundation based on the ocfs2
12  * cluster layer with the in-kernel compression implementation of zcache, and
13  * adds code to glue them together.  When a page is "put" to RAMster, it is
14  * compressed and stored locally.  Periodically, a thread will "remotify" these
15  * pages by sending them via messages to a remote machine.  When the page is
16  * later needed as indicated by a page fault, a "get" is issued.  If the data
17  * is local, it is uncompressed and the fault is resolved.  If the data is
18  * remote, a message is sent to fetch the data and the faulting thread sleeps;
19  * when the data arrives, the thread awakens, the data is decompressed and
20  * the fault is resolved.
21
22  * As of V5, clusters up to eight nodes are supported; each node can remotify
23  * pages to one specified node, so clusters can be configured as clients to
24  * a "memory server".  Some simple policy is in place that will need to be
25  * refined over time.  Larger clusters and fault-resistant protocols can also
26  * be added over time.
27  */
28
29 #include <linux/module.h>
30 #include <linux/cpu.h>
31 #include <linux/highmem.h>
32 #include <linux/list.h>
33 #include <linux/lzo.h>
34 #include <linux/slab.h>
35 #include <linux/spinlock.h>
36 #include <linux/types.h>
37 #include <linux/atomic.h>
38 #include <linux/frontswap.h>
39 #include "../tmem.h"
40 #include "../zcache.h"
41 #include "../zbud.h"
42 #include "ramster.h"
43 #include "ramster_nodemanager.h"
44 #include "tcp.h"
45 #include "debug.h"
46
47 #define RAMSTER_TESTING
48
49 #ifndef CONFIG_SYSFS
50 #error "ramster needs sysfs to define cluster nodes to use"
51 #endif
52
53 static bool use_cleancache __read_mostly;
54 static bool use_frontswap __read_mostly;
55 static bool use_frontswap_exclusive_gets __read_mostly;
56
57 /* These must be sysfs not debugfs as they are checked/used by userland!! */
58 static unsigned long ramster_interface_revision __read_mostly =
59         R2NM_API_VERSION; /* interface revision must match userspace! */
60 static unsigned long ramster_pers_remotify_enable __read_mostly;
61 static unsigned long ramster_eph_remotify_enable __read_mostly;
62 static atomic_t ramster_remote_pers_pages = ATOMIC_INIT(0);
63 #define MANUAL_NODES 8
64 static bool ramster_nodes_manual_up[MANUAL_NODES] __read_mostly;
65 static int ramster_remote_target_nodenum __read_mostly = -1;
66
67 /* Used by this code. */
68 long ramster_flnodes;
69 ssize_t ramster_foreign_eph_pages;
70 ssize_t ramster_foreign_pers_pages;
71 /* FIXME frontswap selfshrinking knobs in debugfs? */
72
73 static LIST_HEAD(ramster_rem_op_list);
74 static DEFINE_SPINLOCK(ramster_rem_op_list_lock);
75 static DEFINE_PER_CPU(struct ramster_preload, ramster_preloads);
76
77 static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem1);
78 static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem2);
79
80 static struct kmem_cache *ramster_flnode_cache __read_mostly;
81
82 static struct flushlist_node *ramster_flnode_alloc(struct tmem_pool *pool)
83 {
84         struct flushlist_node *flnode = NULL;
85         struct ramster_preload *kp;
86
87         kp = &__get_cpu_var(ramster_preloads);
88         flnode = kp->flnode;
89         BUG_ON(flnode == NULL);
90         kp->flnode = NULL;
91         inc_ramster_flnodes();
92         return flnode;
93 }
94
95 /* the "flush list" asynchronously collects pages to remotely flush */
96 #define FLUSH_ENTIRE_OBJECT ((uint32_t)-1)
97 static void ramster_flnode_free(struct flushlist_node *flnode,
98                                 struct tmem_pool *pool)
99 {
100         dec_ramster_flnodes();
101         BUG_ON(ramster_flnodes < 0);
102         kmem_cache_free(ramster_flnode_cache, flnode);
103 }
104
105 int ramster_do_preload_flnode(struct tmem_pool *pool)
106 {
107         struct ramster_preload *kp;
108         struct flushlist_node *flnode;
109         int ret = -ENOMEM;
110
111         BUG_ON(!irqs_disabled());
112         if (unlikely(ramster_flnode_cache == NULL))
113                 BUG();
114         kp = &__get_cpu_var(ramster_preloads);
115         flnode = kmem_cache_alloc(ramster_flnode_cache, GFP_ATOMIC);
116         if (unlikely(flnode == NULL) && kp->flnode == NULL)
117                 BUG();  /* FIXME handle more gracefully, but how??? */
118         else if (kp->flnode == NULL)
119                 kp->flnode = flnode;
120         else
121                 kmem_cache_free(ramster_flnode_cache, flnode);
122         return ret;
123 }
124 EXPORT_SYMBOL_GPL(ramster_do_preload_flnode);
125
126 /*
127  * Called by the message handler after a (still compressed) page has been
128  * fetched from the remote machine in response to an "is_remote" tmem_get
129  * or persistent tmem_localify.  For a tmem_get, "extra" is the address of
130  * the page that is to be filled to successfully resolve the tmem_get; for
131  * a (persistent) tmem_localify, "extra" is NULL (as the data is placed only
132  * in the local zcache).  "data" points to "size" bytes of (compressed) data
133  * passed in the message.  In the case of a persistent remote get, if
134  * pre-allocation was successful (see ramster_repatriate_preload), the page
135  * is placed into both local zcache and at "extra".
136  */
137 int ramster_localify(int pool_id, struct tmem_oid *oidp, uint32_t index,
138                         char *data, unsigned int size, void *extra)
139 {
140         int ret = -ENOENT;
141         unsigned long flags;
142         struct tmem_pool *pool;
143         bool eph, delete = false;
144         void *pampd, *saved_hb;
145         struct tmem_obj *obj;
146
147         pool = zcache_get_pool_by_id(LOCAL_CLIENT, pool_id);
148         if (unlikely(pool == NULL))
149                 /* pool doesn't exist anymore */
150                 goto out;
151         eph = is_ephemeral(pool);
152         local_irq_save(flags);  /* FIXME: maybe only disable softirqs? */
153         pampd = tmem_localify_get_pampd(pool, oidp, index, &obj, &saved_hb);
154         if (pampd == NULL) {
155                 /* hmmm... must have been a flush while waiting */
156 #ifdef RAMSTER_TESTING
157                 pr_err("UNTESTED pampd==NULL in ramster_localify\n");
158 #endif
159                 if (eph)
160                         inc_ramster_remote_eph_pages_unsucc_get();
161                 else
162                         inc_ramster_remote_pers_pages_unsucc_get();
163                 obj = NULL;
164                 goto finish;
165         } else if (unlikely(!pampd_is_remote(pampd))) {
166                 /* hmmm... must have been a dup put while waiting */
167 #ifdef RAMSTER_TESTING
168                 pr_err("UNTESTED dup while waiting in ramster_localify\n");
169 #endif
170                 if (eph)
171                         inc_ramster_remote_eph_pages_unsucc_get();
172                 else
173                         inc_ramster_remote_pers_pages_unsucc_get();
174                 obj = NULL;
175                 pampd = NULL;
176                 ret = -EEXIST;
177                 goto finish;
178         } else if (size == 0) {
179                 /* no remote data, delete the local is_remote pampd */
180                 pampd = NULL;
181                 if (eph)
182                         inc_ramster_remote_eph_pages_unsucc_get();
183                 else
184                         BUG();
185                 delete = true;
186                 goto finish;
187         }
188         if (pampd_is_intransit(pampd)) {
189                 /*
190                  *  a pampd is marked intransit if it is remote and space has
191                  *  been allocated for it locally (note, only happens for
192                  *  persistent pages, in which case the remote copy is freed)
193                  */
194                 BUG_ON(eph);
195                 pampd = pampd_mask_intransit_and_remote(pampd);
196                 zbud_copy_to_zbud(pampd, data, size);
197         } else {
198                 /*
199                  * setting pampd to NULL tells tmem_localify_finish to leave
200                  * pampd alone... meaning it is left pointing to the
201                  * remote copy
202                  */
203                 pampd = NULL;
204                 obj = NULL;
205         }
206         /*
207          * but in all cases, we decompress direct-to-memory to complete
208          * the remotify and return success
209          */
210         BUG_ON(extra == NULL);
211         zcache_decompress_to_page(data, size, (struct page *)extra);
212         if (eph)
213                 inc_ramster_remote_eph_pages_succ_get();
214         else
215                 inc_ramster_remote_pers_pages_succ_get();
216         ret = 0;
217 finish:
218         tmem_localify_finish(obj, index, pampd, saved_hb, delete);
219         zcache_put_pool(pool);
220         local_irq_restore(flags);
221 out:
222         return ret;
223 }
224
225 void ramster_pampd_new_obj(struct tmem_obj *obj)
226 {
227         obj->extra = NULL;
228 }
229
230 void ramster_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj,
231                                 bool pool_destroy)
232 {
233         struct flushlist_node *flnode;
234
235         BUG_ON(preemptible());
236         if (obj->extra == NULL)
237                 return;
238         if (pool_destroy && is_ephemeral(pool))
239                 /* FIXME don't bother with remote eph data for now */
240                 return;
241         BUG_ON(!pampd_is_remote(obj->extra));
242         flnode = ramster_flnode_alloc(pool);
243         flnode->xh.client_id = pampd_remote_node(obj->extra);
244         flnode->xh.pool_id = pool->pool_id;
245         flnode->xh.oid = obj->oid;
246         flnode->xh.index = FLUSH_ENTIRE_OBJECT;
247         flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_OBJ;
248         spin_lock(&ramster_rem_op_list_lock);
249         list_add(&flnode->rem_op.list, &ramster_rem_op_list);
250         spin_unlock(&ramster_rem_op_list_lock);
251 }
252
253 /*
254  * Called on a remote persistent tmem_get to attempt to preallocate
255  * local storage for the data contained in the remote persistent page.
256  * If successfully preallocated, returns the pampd, marked as remote and
257  * in_transit.  Else returns NULL.  Note that the appropriate tmem data
258  * structure must be locked.
259  */
260 void *ramster_pampd_repatriate_preload(void *pampd, struct tmem_pool *pool,
261                                         struct tmem_oid *oidp, uint32_t index,
262                                         bool *intransit)
263 {
264         int clen = pampd_remote_size(pampd), c;
265         void *ret_pampd = NULL;
266         unsigned long flags;
267         struct tmem_handle th;
268
269         BUG_ON(!pampd_is_remote(pampd));
270         BUG_ON(is_ephemeral(pool));
271         if (use_frontswap_exclusive_gets)
272                 /* don't need local storage */
273                 goto out;
274         if (pampd_is_intransit(pampd)) {
275                 /*
276                  * to avoid multiple allocations (and maybe a memory leak)
277                  * don't preallocate if already in the process of being
278                  * repatriated
279                  */
280                 *intransit = true;
281                 goto out;
282         }
283         *intransit = false;
284         local_irq_save(flags);
285         th.client_id = pampd_remote_node(pampd);
286         th.pool_id = pool->pool_id;
287         th.oid = *oidp;
288         th.index = index;
289         ret_pampd = zcache_pampd_create(NULL, clen, true, false, &th);
290         if (ret_pampd != NULL) {
291                 /*
292                  *  a pampd is marked intransit if it is remote and space has
293                  *  been allocated for it locally (note, only happens for
294                  *  persistent pages, in which case the remote copy is freed)
295                  */
296                 ret_pampd = pampd_mark_intransit(ret_pampd);
297                 c = atomic_dec_return(&ramster_remote_pers_pages);
298                 WARN_ON_ONCE(c < 0);
299         } else {
300                 inc_ramster_pers_pages_remote_nomem();
301         }
302         local_irq_restore(flags);
303 out:
304         return ret_pampd;
305 }
306
307 /*
308  * Called on a remote tmem_get to invoke a message to fetch the page.
309  * Might sleep so no tmem locks can be held.  "extra" is passed
310  * all the way through the round-trip messaging to ramster_localify.
311  */
312 int ramster_pampd_repatriate(void *fake_pampd, void *real_pampd,
313                                 struct tmem_pool *pool,
314                                 struct tmem_oid *oid, uint32_t index,
315                                 bool free, void *extra)
316 {
317         struct tmem_xhandle xh;
318         int ret;
319
320         if (pampd_is_intransit(real_pampd))
321                 /* have local space pre-reserved, so free remote copy */
322                 free = true;
323         xh = tmem_xhandle_fill(LOCAL_CLIENT, pool, oid, index);
324         /* unreliable request/response for now */
325         ret = r2net_remote_async_get(&xh, free,
326                                         pampd_remote_node(fake_pampd),
327                                         pampd_remote_size(fake_pampd),
328                                         pampd_remote_cksum(fake_pampd),
329                                         extra);
330         return ret;
331 }
332
333 bool ramster_pampd_is_remote(void *pampd)
334 {
335         return pampd_is_remote(pampd);
336 }
337
338 int ramster_pampd_replace_in_obj(void *new_pampd, struct tmem_obj *obj)
339 {
340         int ret = -1;
341
342         if (new_pampd != NULL) {
343                 if (obj->extra == NULL)
344                         obj->extra = new_pampd;
345                 /* enforce that all remote pages in an object reside
346                  * in the same node! */
347                 else if (pampd_remote_node(new_pampd) !=
348                                 pampd_remote_node((void *)(obj->extra)))
349                         BUG();
350                 ret = 0;
351         }
352         return ret;
353 }
354
355 void *ramster_pampd_free(void *pampd, struct tmem_pool *pool,
356                               struct tmem_oid *oid, uint32_t index, bool acct)
357 {
358         bool eph = is_ephemeral(pool);
359         void *local_pampd = NULL;
360         int c;
361
362         BUG_ON(preemptible());
363         BUG_ON(!pampd_is_remote(pampd));
364         WARN_ON(acct == false);
365         if (oid == NULL) {
366                 /*
367                  * a NULL oid means to ignore this pampd free
368                  * as the remote freeing will be handled elsewhere
369                  */
370         } else if (eph) {
371                 /* FIXME remote flush optional but probably good idea */
372         } else if (pampd_is_intransit(pampd)) {
373                 /* did a pers remote get_and_free, so just free local */
374                 local_pampd = pampd_mask_intransit_and_remote(pampd);
375         } else {
376                 struct flushlist_node *flnode =
377                         ramster_flnode_alloc(pool);
378
379                 flnode->xh.client_id = pampd_remote_node(pampd);
380                 flnode->xh.pool_id = pool->pool_id;
381                 flnode->xh.oid = *oid;
382                 flnode->xh.index = index;
383                 flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_PAGE;
384                 spin_lock(&ramster_rem_op_list_lock);
385                 list_add(&flnode->rem_op.list, &ramster_rem_op_list);
386                 spin_unlock(&ramster_rem_op_list_lock);
387                 c = atomic_dec_return(&ramster_remote_pers_pages);
388                 WARN_ON_ONCE(c < 0);
389         }
390         return local_pampd;
391 }
392 EXPORT_SYMBOL_GPL(ramster_pampd_free);
393
394 void ramster_count_foreign_pages(bool eph, int count)
395 {
396         BUG_ON(count != 1 && count != -1);
397         if (eph) {
398                 if (count > 0) {
399                         inc_ramster_foreign_eph_pages();
400                 } else {
401                         dec_ramster_foreign_eph_pages();
402                         WARN_ON_ONCE(ramster_foreign_eph_pages < 0);
403                 }
404         } else {
405                 if (count > 0) {
406                         inc_ramster_foreign_pers_pages();
407                 } else {
408                         dec_ramster_foreign_pers_pages();
409                         WARN_ON_ONCE(ramster_foreign_pers_pages < 0);
410                 }
411         }
412 }
413 EXPORT_SYMBOL_GPL(ramster_count_foreign_pages);
414
415 /*
416  * For now, just push over a few pages every few seconds to
417  * ensure that it basically works
418  */
419 static struct workqueue_struct *ramster_remotify_workqueue;
420 static void ramster_remotify_process(struct work_struct *work);
421 static DECLARE_DELAYED_WORK(ramster_remotify_worker,
422                 ramster_remotify_process);
423
424 static void ramster_remotify_queue_delayed_work(unsigned long delay)
425 {
426         if (!queue_delayed_work(ramster_remotify_workqueue,
427                                 &ramster_remotify_worker, delay))
428                 pr_err("ramster_remotify: bad workqueue\n");
429 }
430
431 static void ramster_remote_flush_page(struct flushlist_node *flnode)
432 {
433         struct tmem_xhandle *xh;
434         int remotenode, ret;
435
436         preempt_disable();
437         xh = &flnode->xh;
438         remotenode = flnode->xh.client_id;
439         ret = r2net_remote_flush(xh, remotenode);
440         if (ret >= 0)
441                 inc_ramster_remote_pages_flushed();
442         else
443                 inc_ramster_remote_page_flushes_failed();
444         preempt_enable_no_resched();
445         ramster_flnode_free(flnode, NULL);
446 }
447
448 static void ramster_remote_flush_object(struct flushlist_node *flnode)
449 {
450         struct tmem_xhandle *xh;
451         int remotenode, ret;
452
453         preempt_disable();
454         xh = &flnode->xh;
455         remotenode = flnode->xh.client_id;
456         ret = r2net_remote_flush_object(xh, remotenode);
457         if (ret >= 0)
458                 inc_ramster_remote_objects_flushed();
459         else
460                 inc_ramster_remote_object_flushes_failed();
461         preempt_enable_no_resched();
462         ramster_flnode_free(flnode, NULL);
463 }
464
465 int ramster_remotify_pageframe(bool eph)
466 {
467         struct tmem_xhandle xh;
468         unsigned int size;
469         int remotenode, ret, zbuds;
470         struct tmem_pool *pool;
471         unsigned long flags;
472         unsigned char cksum;
473         char *p;
474         int i, j;
475         unsigned char *tmpmem[2];
476         struct tmem_handle th[2];
477         unsigned int zsize[2];
478
479         tmpmem[0] = __get_cpu_var(ramster_remoteputmem1);
480         tmpmem[1] = __get_cpu_var(ramster_remoteputmem2);
481         local_bh_disable();
482         zbuds = zbud_make_zombie_lru(&th[0], &tmpmem[0], &zsize[0], eph);
483         /* now OK to release lock set in caller */
484         local_bh_enable();
485         if (zbuds == 0)
486                 goto out;
487         BUG_ON(zbuds > 2);
488         for (i = 0; i < zbuds; i++) {
489                 xh.client_id = th[i].client_id;
490                 xh.pool_id = th[i].pool_id;
491                 xh.oid = th[i].oid;
492                 xh.index = th[i].index;
493                 size = zsize[i];
494                 BUG_ON(size == 0 || size > zbud_max_buddy_size());
495                 for (p = tmpmem[i], cksum = 0, j = 0; j < size; j++)
496                         cksum += *p++;
497                 ret = r2net_remote_put(&xh, tmpmem[i], size, eph, &remotenode);
498                 if (ret != 0) {
499                 /*
500                  * This is some form of a memory leak... if the remote put
501                  * fails, there will never be another attempt to remotify
502                  * this page.  But since we've dropped the zv pointer,
503                  * the page may have been freed or the data replaced
504                  * so we can't just "put it back" in the remote op list.
505                  * Even if we could, not sure where to put it in the list
506                  * because there may be flushes that must be strictly
507                  * ordered vs the put.  So leave this as a FIXME for now.
508                  * But count them so we know if it becomes a problem.
509                  */
510                         if (eph)
511                                 inc_ramster_eph_pages_remote_failed();
512                         else
513                                 inc_ramster_pers_pages_remote_failed();
514                         break;
515                 } else {
516                         if (!eph)
517                                 atomic_inc(&ramster_remote_pers_pages);
518                 }
519                 if (eph)
520                         inc_ramster_eph_pages_remoted();
521                 else
522                         inc_ramster_pers_pages_remoted();
523                 /*
524                  * data was successfully remoted so change the local version to
525                  * point to the remote node where it landed
526                  */
527                 local_bh_disable();
528                 pool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);
529                 local_irq_save(flags);
530                 (void)tmem_replace(pool, &xh.oid, xh.index,
531                                 pampd_make_remote(remotenode, size, cksum));
532                 local_irq_restore(flags);
533                 zcache_put_pool(pool);
534                 local_bh_enable();
535         }
536 out:
537         return zbuds;
538 }
539
540 static void zcache_do_remotify_flushes(void)
541 {
542         struct ramster_remotify_hdr *rem_op;
543         union remotify_list_node *u;
544
545         while (1) {
546                 spin_lock(&ramster_rem_op_list_lock);
547                 if (list_empty(&ramster_rem_op_list)) {
548                         spin_unlock(&ramster_rem_op_list_lock);
549                         goto out;
550                 }
551                 rem_op = list_first_entry(&ramster_rem_op_list,
552                                 struct ramster_remotify_hdr, list);
553                 list_del_init(&rem_op->list);
554                 spin_unlock(&ramster_rem_op_list_lock);
555                 u = (union remotify_list_node *)rem_op;
556                 switch (rem_op->op) {
557                 case RAMSTER_REMOTIFY_FLUSH_PAGE:
558                         ramster_remote_flush_page((struct flushlist_node *)u);
559                         break;
560                 case RAMSTER_REMOTIFY_FLUSH_OBJ:
561                         ramster_remote_flush_object((struct flushlist_node *)u);
562                         break;
563                 default:
564                         BUG();
565                 }
566         }
567 out:
568         return;
569 }
570
571 static void ramster_remotify_process(struct work_struct *work)
572 {
573         static bool remotify_in_progress;
574         int i;
575
576         BUG_ON(irqs_disabled());
577         if (remotify_in_progress)
578                 goto requeue;
579         if (ramster_remote_target_nodenum == -1)
580                 goto requeue;
581         remotify_in_progress = true;
582         if (use_cleancache && ramster_eph_remotify_enable) {
583                 for (i = 0; i < 100; i++) {
584                         zcache_do_remotify_flushes();
585                         (void)ramster_remotify_pageframe(true);
586                 }
587         }
588         if (use_frontswap && ramster_pers_remotify_enable) {
589                 for (i = 0; i < 100; i++) {
590                         zcache_do_remotify_flushes();
591                         (void)ramster_remotify_pageframe(false);
592                 }
593         }
594         remotify_in_progress = false;
595 requeue:
596         ramster_remotify_queue_delayed_work(HZ);
597 }
598
599 void ramster_remotify_init(void)
600 {
601         unsigned long n = 60UL;
602         ramster_remotify_workqueue =
603                 create_singlethread_workqueue("ramster_remotify");
604         ramster_remotify_queue_delayed_work(n * HZ);
605 }
606
607 static ssize_t ramster_manual_node_up_show(struct kobject *kobj,
608                                 struct kobj_attribute *attr, char *buf)
609 {
610         int i;
611         char *p = buf;
612         for (i = 0; i < MANUAL_NODES; i++)
613                 if (ramster_nodes_manual_up[i])
614                         p += sprintf(p, "%d ", i);
615         p += sprintf(p, "\n");
616         return p - buf;
617 }
618
619 static ssize_t ramster_manual_node_up_store(struct kobject *kobj,
620                 struct kobj_attribute *attr, const char *buf, size_t count)
621 {
622         int err;
623         unsigned long node_num;
624
625         err = kstrtoul(buf, 10, &node_num);
626         if (err) {
627                 pr_err("ramster: bad strtoul?\n");
628                 return -EINVAL;
629         }
630         if (node_num >= MANUAL_NODES) {
631                 pr_err("ramster: bad node_num=%lu?\n", node_num);
632                 return -EINVAL;
633         }
634         if (ramster_nodes_manual_up[node_num]) {
635                 pr_err("ramster: node %d already up, ignoring\n",
636                                                         (int)node_num);
637         } else {
638                 ramster_nodes_manual_up[node_num] = true;
639                 r2net_hb_node_up_manual((int)node_num);
640         }
641         return count;
642 }
643
644 static struct kobj_attribute ramster_manual_node_up_attr = {
645         .attr = { .name = "manual_node_up", .mode = 0644 },
646         .show = ramster_manual_node_up_show,
647         .store = ramster_manual_node_up_store,
648 };
649
650 static ssize_t ramster_remote_target_nodenum_show(struct kobject *kobj,
651                                 struct kobj_attribute *attr, char *buf)
652 {
653         if (ramster_remote_target_nodenum == -1UL)
654                 return sprintf(buf, "unset\n");
655         else
656                 return sprintf(buf, "%d\n", ramster_remote_target_nodenum);
657 }
658
659 static ssize_t ramster_remote_target_nodenum_store(struct kobject *kobj,
660                 struct kobj_attribute *attr, const char *buf, size_t count)
661 {
662         int err;
663         unsigned long node_num;
664
665         err = kstrtoul(buf, 10, &node_num);
666         if (err) {
667                 pr_err("ramster: bad strtoul?\n");
668                 return -EINVAL;
669         } else if (node_num == -1UL) {
670                 pr_err("ramster: disabling all remotification, "
671                         "data may still reside on remote nodes however\n");
672                 return -EINVAL;
673         } else if (node_num >= MANUAL_NODES) {
674                 pr_err("ramster: bad node_num=%lu?\n", node_num);
675                 return -EINVAL;
676         } else if (!ramster_nodes_manual_up[node_num]) {
677                 pr_err("ramster: node %d not up, ignoring setting "
678                         "of remotification target\n", (int)node_num);
679         } else if (r2net_remote_target_node_set((int)node_num) >= 0) {
680                 pr_info("ramster: node %d set as remotification target\n",
681                                 (int)node_num);
682                 ramster_remote_target_nodenum = (int)node_num;
683         } else {
684                 pr_err("ramster: bad num to node node_num=%d?\n",
685                                 (int)node_num);
686                 return -EINVAL;
687         }
688         return count;
689 }
690
691 static struct kobj_attribute ramster_remote_target_nodenum_attr = {
692         .attr = { .name = "remote_target_nodenum", .mode = 0644 },
693         .show = ramster_remote_target_nodenum_show,
694         .store = ramster_remote_target_nodenum_store,
695 };
696
697 #define RAMSTER_SYSFS_RO(_name) \
698         static ssize_t ramster_##_name##_show(struct kobject *kobj, \
699                                 struct kobj_attribute *attr, char *buf) \
700         { \
701                 return sprintf(buf, "%lu\n", ramster_##_name); \
702         } \
703         static struct kobj_attribute ramster_##_name##_attr = { \
704                 .attr = { .name = __stringify(_name), .mode = 0444 }, \
705                 .show = ramster_##_name##_show, \
706         }
707
708 #define RAMSTER_SYSFS_RW(_name) \
709         static ssize_t ramster_##_name##_show(struct kobject *kobj, \
710                                 struct kobj_attribute *attr, char *buf) \
711         { \
712                 return sprintf(buf, "%lu\n", ramster_##_name); \
713         } \
714         static ssize_t ramster_##_name##_store(struct kobject *kobj, \
715                 struct kobj_attribute *attr, const char *buf, size_t count) \
716         { \
717                 int err; \
718                 unsigned long enable; \
719                 err = kstrtoul(buf, 10, &enable); \
720                 if (err) \
721                         return -EINVAL; \
722                 ramster_##_name = enable; \
723                 return count; \
724         } \
725         static struct kobj_attribute ramster_##_name##_attr = { \
726                 .attr = { .name = __stringify(_name), .mode = 0644 }, \
727                 .show = ramster_##_name##_show, \
728                 .store = ramster_##_name##_store, \
729         }
730
731 #define RAMSTER_SYSFS_RO_ATOMIC(_name) \
732         static ssize_t ramster_##_name##_show(struct kobject *kobj, \
733                                 struct kobj_attribute *attr, char *buf) \
734         { \
735             return sprintf(buf, "%d\n", atomic_read(&ramster_##_name)); \
736         } \
737         static struct kobj_attribute ramster_##_name##_attr = { \
738                 .attr = { .name = __stringify(_name), .mode = 0444 }, \
739                 .show = ramster_##_name##_show, \
740         }
741
742 RAMSTER_SYSFS_RO(interface_revision);
743 RAMSTER_SYSFS_RO_ATOMIC(remote_pers_pages);
744 RAMSTER_SYSFS_RW(pers_remotify_enable);
745 RAMSTER_SYSFS_RW(eph_remotify_enable);
746
747 static struct attribute *ramster_attrs[] = {
748         &ramster_interface_revision_attr.attr,
749         &ramster_remote_pers_pages_attr.attr,
750         &ramster_manual_node_up_attr.attr,
751         &ramster_remote_target_nodenum_attr.attr,
752         &ramster_pers_remotify_enable_attr.attr,
753         &ramster_eph_remotify_enable_attr.attr,
754         NULL,
755 };
756
757 static struct attribute_group ramster_attr_group = {
758         .attrs = ramster_attrs,
759         .name = "ramster",
760 };
761
762 /*
763  * frontswap selfshrinking
764  */
765
766 /* In HZ, controls frequency of worker invocation. */
767 static unsigned int selfshrink_interval __read_mostly = 5;
768 /* Enable/disable with sysfs. */
769 static bool frontswap_selfshrinking __read_mostly;
770
771 static void selfshrink_process(struct work_struct *work);
772 static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process);
773
774 #ifndef CONFIG_RAMSTER_MODULE
775 /* Enable/disable with kernel boot option. */
776 static bool use_frontswap_selfshrink = true;
777 #endif
778
779 /*
780  * The default values for the following parameters were deemed reasonable
781  * by experimentation, may be workload-dependent, and can all be
782  * adjusted via sysfs.
783  */
784
785 /* Control rate for frontswap shrinking. Higher hysteresis is slower. */
786 static unsigned int frontswap_hysteresis __read_mostly = 20;
787
788 /*
789  * Number of selfshrink worker invocations to wait before observing that
790  * frontswap selfshrinking should commence. Note that selfshrinking does
791  * not use a separate worker thread.
792  */
793 static unsigned int frontswap_inertia __read_mostly = 3;
794
795 /* Countdown to next invocation of frontswap_shrink() */
796 static unsigned long frontswap_inertia_counter;
797
798 /*
799  * Invoked by the selfshrink worker thread, uses current number of pages
800  * in frontswap (frontswap_curr_pages()), previous status, and control
801  * values (hysteresis and inertia) to determine if frontswap should be
802  * shrunk and what the new frontswap size should be.  Note that
803  * frontswap_shrink is essentially a partial swapoff that immediately
804  * transfers pages from the "swap device" (frontswap) back into kernel
805  * RAM; despite the name, frontswap "shrinking" is very different from
806  * the "shrinker" interface used by the kernel MM subsystem to reclaim
807  * memory.
808  */
809 static void frontswap_selfshrink(void)
810 {
811         static unsigned long cur_frontswap_pages;
812         static unsigned long last_frontswap_pages;
813         static unsigned long tgt_frontswap_pages;
814
815         last_frontswap_pages = cur_frontswap_pages;
816         cur_frontswap_pages = frontswap_curr_pages();
817         if (!cur_frontswap_pages ||
818                         (cur_frontswap_pages > last_frontswap_pages)) {
819                 frontswap_inertia_counter = frontswap_inertia;
820                 return;
821         }
822         if (frontswap_inertia_counter && --frontswap_inertia_counter)
823                 return;
824         if (cur_frontswap_pages <= frontswap_hysteresis)
825                 tgt_frontswap_pages = 0;
826         else
827                 tgt_frontswap_pages = cur_frontswap_pages -
828                         (cur_frontswap_pages / frontswap_hysteresis);
829         frontswap_shrink(tgt_frontswap_pages);
830 }
831
832 #ifndef CONFIG_RAMSTER_MODULE
833 static int __init ramster_nofrontswap_selfshrink_setup(char *s)
834 {
835         use_frontswap_selfshrink = false;
836         return 1;
837 }
838
839 __setup("noselfshrink", ramster_nofrontswap_selfshrink_setup);
840 #endif
841
842 static void selfshrink_process(struct work_struct *work)
843 {
844         if (frontswap_selfshrinking && frontswap_enabled) {
845                 frontswap_selfshrink();
846                 schedule_delayed_work(&selfshrink_worker,
847                         selfshrink_interval * HZ);
848         }
849 }
850
851 void ramster_cpu_up(int cpu)
852 {
853         unsigned char *p1 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
854         unsigned char *p2 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
855         BUG_ON(!p1 || !p2);
856         per_cpu(ramster_remoteputmem1, cpu) = p1;
857         per_cpu(ramster_remoteputmem2, cpu) = p2;
858 }
859 EXPORT_SYMBOL_GPL(ramster_cpu_up);
860
861 void ramster_cpu_down(int cpu)
862 {
863         struct ramster_preload *kp;
864
865         kfree(per_cpu(ramster_remoteputmem1, cpu));
866         per_cpu(ramster_remoteputmem1, cpu) = NULL;
867         kfree(per_cpu(ramster_remoteputmem2, cpu));
868         per_cpu(ramster_remoteputmem2, cpu) = NULL;
869         kp = &per_cpu(ramster_preloads, cpu);
870         if (kp->flnode) {
871                 kmem_cache_free(ramster_flnode_cache, kp->flnode);
872                 kp->flnode = NULL;
873         }
874 }
875 EXPORT_SYMBOL_GPL(ramster_cpu_down);
876
877 void ramster_register_pamops(struct tmem_pamops *pamops)
878 {
879         pamops->free_obj = ramster_pampd_free_obj;
880         pamops->new_obj = ramster_pampd_new_obj;
881         pamops->replace_in_obj = ramster_pampd_replace_in_obj;
882         pamops->is_remote = ramster_pampd_is_remote;
883         pamops->repatriate = ramster_pampd_repatriate;
884         pamops->repatriate_preload = ramster_pampd_repatriate_preload;
885 }
886 EXPORT_SYMBOL_GPL(ramster_register_pamops);
887
888 void ramster_init(bool cleancache, bool frontswap,
889                                 bool frontswap_exclusive_gets,
890                                 bool frontswap_selfshrink)
891 {
892         int ret = 0;
893
894         if (cleancache)
895                 use_cleancache = true;
896         if (frontswap)
897                 use_frontswap = true;
898         if (frontswap_exclusive_gets)
899                 use_frontswap_exclusive_gets = true;
900         ramster_debugfs_init();
901         ret = sysfs_create_group(mm_kobj, &ramster_attr_group);
902         if (ret)
903                 pr_err("ramster: can't create sysfs for ramster\n");
904         (void)r2net_register_handlers();
905 #ifdef CONFIG_RAMSTER_MODULE
906         ret = r2nm_init();
907         if (ret)
908                 pr_err("ramster: can't init r2net\n");
909         frontswap_selfshrinking = frontswap_selfshrink;
910 #else
911         frontswap_selfshrinking = use_frontswap_selfshrink;
912 #endif
913         INIT_LIST_HEAD(&ramster_rem_op_list);
914         ramster_flnode_cache = kmem_cache_create("ramster_flnode",
915                                 sizeof(struct flushlist_node), 0, 0, NULL);
916         if (frontswap_selfshrinking) {
917                 pr_info("ramster: Initializing frontswap selfshrink driver.\n");
918                 schedule_delayed_work(&selfshrink_worker,
919                                         selfshrink_interval * HZ);
920         }
921         ramster_remotify_init();
922 }
923 EXPORT_SYMBOL_GPL(ramster_init);