Commit 9a293c2d authored by Kirill Smelkov's avatar Kirill Smelkov

bigfile/virtmem: Userspace Virtual Memory Manager

Does similar things to what kernel does - users can mmap file parts into
address space and access them read/write. The manager will be getting
invoked by hardware/OS kernel for cases when there is no page loaded for
read, or when a previousle read-only page is being written to.

Additionally to features provided in kernel, it support to be used to
store back changes in transactional way (see fileh_dirty_writeout()) and
potentially use huge pages for mappings (though this is currently TODO)
parent 9065e2b9
...@@ -62,6 +62,19 @@ LOADLIBES=lib/bug.c lib/utils.c 3rdparty/ccan/ccan/tap/tap.c ...@@ -62,6 +62,19 @@ LOADLIBES=lib/bug.c lib/utils.c 3rdparty/ccan/ccan/tap/tap.c
TESTS := $(patsubst %.c,%,$(wildcard bigfile/tests/test_*.c)) TESTS := $(patsubst %.c,%,$(wildcard bigfile/tests/test_*.c))
test : test.t test.fault test.asan test.tsan test.vgmem test.vghel test.vgdrd test : test.t test.fault test.asan test.tsan test.vgmem test.vghel test.vgdrd
# TODO move XFAIL markers into *.c
# TSAN fails on test_virtmem (http://code.google.com/p/thread-sanitizer/issues/detail?id=75)
# NOTE the bug was fixed in compiler-rt 20140917 (6afe775d)
# -> we can remove this xfail when the fix propagates to gcc/clang release
XFAIL_bigfile/tests/test_virtmem.tsanrun := y
# Before calling our SIGSEGV handler, Memcheck first reports "invalid read|write" error.
# A solution could be to tell memcheck via VALGRIND_MAKE_MEM_DEFINED that VMA
# address space is ok to access _before_ handling pagefault.
# http://valgrind.org/docs/manual/mc-manual.html#mc-manual.clientreqs
XFAIL_bigfile/tests/test_virtmem.vgmemrun := y
# extract what goes after RUNWITH: marker from command source, or empty if no marker # extract what goes after RUNWITH: marker from command source, or empty if no marker
runwith = $(shell grep -oP '(?<=^// RUNWITH: ).*' $(basename $1).c) runwith = $(shell grep -oP '(?<=^// RUNWITH: ).*' $(basename $1).c)
......
...@@ -24,6 +24,10 @@ ...@@ -24,6 +24,10 @@
* read/write, and tail to vma_on_pagefault(). * read/write, and tail to vma_on_pagefault().
*/ */
#include <wendelin/bigfile/virtmem.h>
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/ram.h>
#include <wendelin/bigfile/pagemap.h>
#include <wendelin/bug.h> #include <wendelin/bug.h>
#include <signal.h> #include <signal.h>
...@@ -44,6 +48,7 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc) ...@@ -44,6 +48,7 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc)
{ {
struct ucontext *uc = _uc; struct ucontext *uc = _uc;
unsigned write; unsigned write;
VMA *vma;
BUG_ON(sig != SIGSEGV); BUG_ON(sig != SIGSEGV);
BUG_ON(si->si_signo != SIGSEGV); BUG_ON(si->si_signo != SIGSEGV);
...@@ -63,8 +68,9 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc) ...@@ -63,8 +68,9 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc)
// XXX locking // XXX locking
/* (1) addr -> vma ;lookup VMA covering faulting memory address */ /* (1) addr -> vma ;lookup VMA covering faulting memory address */
// TODO vma = virt_lookup_vma(si->si_addr);
goto dont_handle; if (!vma)
goto dont_handle; /* fault outside registered file slices */
/* now, since we found faulting address in registered memory areas, we know /* now, since we found faulting address in registered memory areas, we know
* we should serve this pagefault. */ * we should serve this pagefault. */
...@@ -76,7 +82,7 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc) ...@@ -76,7 +82,7 @@ static void on_pagefault(int sig, siginfo_t *si, void *_uc)
/* save/restore errno XXX & the like ? */ /* save/restore errno XXX & the like ? */
int save_errno = errno; int save_errno = errno;
// TODO handle pagefault at si->si_addr / write vma_on_pagefault(vma, (uintptr_t)si->si_addr, write);
errno = save_errno; errno = save_errno;
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
*/ */
#include <wendelin/bigfile/ram.h> #include <wendelin/bigfile/ram.h>
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/virtmem.h> #include <wendelin/bigfile/virtmem.h>
#include <wendelin/bigfile/pagemap.h>
#include <wendelin/utils.h> #include <wendelin/utils.h>
#include <wendelin/bug.h> #include <wendelin/bug.h>
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
// XXX better link with it // XXX better link with it
#include "../ram.c" #include "../ram.c"
#include "../pagemap.c"
#include "../virtmem.c" #include "../virtmem.c"
#include "../ram_shmfs.c" #include "../ram_shmfs.c"
......
/* Wendelin.bigfile | virtual memory tests
* Copyright (C) 2014-2015 Nexedi SA and Contributors.
* Kirill Smelkov <kirr@nexedi.com>
*
* This program is free software: you can Use, Study, Modify and Redistribute
* it under the terms of the GNU General Public License version 3, or (at your
* option) any later version, as published by the Free Software Foundation.
*
* You can also Link and Combine this program with other software covered by
* the terms of any of the Open Source Initiative approved licenses and Convey
* the resulting work. Corresponding source of such a combination shall include
* the source code for all other software used.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See COPYING file for full licensing terms.
*/
// XXX better link with it
#include "../virtmem.c"
#include "../pagemap.c"
#include "../ram.c"
#include "../ram_shmfs.c"
#include "../pagefault.c"
#include <ccan/tap/tap.h>
#include <setjmp.h>
#include <signal.h>
#include <errno.h>
#include "../../t/t_utils.h"
#include "../../t/t_utils.c"
void test_vmamap()
{
VMA vma1, vma2, vma3;
vma1.addr_start = 0x1000;
vma1.addr_stop = 0x2000;
vma2.addr_start = 0x2000;
vma2.addr_stop = 0x3000;
vma3.addr_start = 0x3000;
vma3.addr_stop = 0x4000;
VMA *L(uintptr_t addr) { return virt_lookup_vma((void *)addr); }
diag("Testing vmamap");
ok1(list_empty(&vma_list));
ok1(!L(0));
ok1(!L(0x1000-1));
ok1(!L(0x1000));
ok1(!L(0x1800));
ok1(!L(0x2000-1));
ok1(!L(0x2000));
virt_register_vma(&vma3);
ok1(!L(0x3000-1));
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_register_vma(&vma1);
ok1(!L(0x1000-1));
ok1( L(0x1000) == &vma1 );
ok1( L(0x1800) == &vma1 );
ok1( L(0x2000-1) == &vma1 );
ok1(!L(0x2000));
ok1(!L(0x3000-1));
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_register_vma(&vma2);
ok1(!L(0x1000-1));
ok1( L(0x1000) == &vma1 );
ok1( L(0x1800) == &vma1 );
ok1( L(0x2000-1) == &vma1 );
ok1( L(0x2000) == &vma2 );
ok1( L(0x2800) == &vma2 );
ok1( L(0x3000-1) == &vma2);
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_unregister_vma(&vma3);
ok1(!L(0x1000-1));
ok1( L(0x1000) == &vma1 );
ok1( L(0x1800) == &vma1 );
ok1( L(0x2000-1) == &vma1 );
ok1( L(0x2000) == &vma2 );
ok1( L(0x2800) == &vma2 );
ok1( L(0x3000-1) == &vma2);
ok1(!L(0x3000));
ok1(!L(0x3800));
ok1(!L(0x4000-1));
ok1(!L(0x4000));
virt_register_vma(&vma3);
ok1(!L(0x1000-1));
ok1( L(0x1000) == &vma1 );
ok1( L(0x1800) == &vma1 );
ok1( L(0x2000-1) == &vma1 );
ok1( L(0x2000) == &vma2 );
ok1( L(0x2800) == &vma2 );
ok1( L(0x3000-1) == &vma2);
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_unregister_vma(&vma2);
ok1(!L(0x1000-1));
ok1( L(0x1000) == &vma1 );
ok1( L(0x1800) == &vma1 );
ok1( L(0x2000-1) == &vma1 );
ok1(!L(0x2000));
ok1(!L(0x2800));
ok1(!L(0x3000-1));
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_unregister_vma(&vma1);
ok1(!L(0x1000-1));
ok1(!L(0x1000));
ok1(!L(0x1800));
ok1(!L(0x2000-1));
ok1(!L(0x2000));
ok1(!L(0x2800));
ok1(!L(0x3000-1));
ok1( L(0x3000) == &vma3 );
ok1( L(0x3800) == &vma3 );
ok1( L(0x4000-1) == &vma3 );
ok1(!L(0x4000));
virt_unregister_vma(&vma3);
ok1(!L(0x1000-1));
ok1(!L(0x1000));
ok1(!L(0x1800));
ok1(!L(0x2000-1));
ok1(!L(0x2000));
ok1(!L(0x2800));
ok1(!L(0x3000-1));
ok1(!L(0x3000));
ok1(!L(0x3800));
ok1(!L(0x4000-1));
ok1(!L(0x4000));
ok1(list_empty(&vma_list));
}
/* file that reads #blk on loadblk(blk) */
struct BigFileIdentity {
BigFile;
};
typedef struct BigFileIdentity BigFileIdentity;
int fileid_loadblk(BigFile *file, blk_t blk, void *buf)
{
blk_t *bbuf = buf;
size_t bsize = file->blksize / sizeof(*bbuf);
while (bsize--)
*bbuf++ = blk;
return 0;
}
static const struct bigfile_ops fileid_ops = {
.loadblk = fileid_loadblk,
.storeblk = NULL, // XXX
.release = NULL, // XXX
};
/* tell ASAN we are using own SIGSEGV handler in MUST_FAULT */
const char *__asan_default_options()
{
return "allow_user_segv_handler=1";
}
/* tell TSAN we are OK with calling async-sig-unsafe fucnctions from sync SIGSEGV */
const char *__tsan_default_options()
{
return "report_signal_unsafe=0";
}
/* whether appropriate page of vma is mapped */
int M(VMA *vma, pgoff_t idx) { return bitmap_test_bit(vma->page_ismappedv, idx); }
/* check that
*
* - page != NULL,
* - page is the same as fileh->pagemap[pgoffset],
* - with expected page->state and page->refcnt.
*/
#define __CHECK_PAGE(page, fileh, pgoffset, pgstate, pgrefcnt) do { \
ok1(page); \
ok1(page == pagemap_get(&(fileh)->pagemap, (pgoffset))); \
ok1((page)->state == (pgstate)); \
ok1((page)->refcnt == (pgrefcnt)); \
} while (0)
/* check that fileh->pagemap[pgfosset] is empty */
#define __CHECK_NOPAGE(fileh, pgoffset) do { \
ok1(!pagemap_get(&(fileh)->pagemap, (pgoffset))); \
} while (0)
/* test access to file mappings via explicit vma_on_pagefault() calls */
void test_file_access_synthetic(void)
{
RAM *ram, *ram0;
BigFileH fh_struct, *fh = &fh_struct;
VMA vma_struct, *vma = &vma_struct;
Page *page0, *page1, *page2, *page3;
blk_t *b0, *b2;
size_t PS, PSb;
int err;
/* MUST_FAULT(code) - checks that code faults */
sigjmp_buf fault_jmp;
volatile int fault_expected = 0;
void sigfault_handler(int sig) {
if (!fault_expected) {
diag("Unexpected fault - abort");
abort();
}
/* just return from sighandler to proper place */
fault_expected = 0;
siglongjmp(fault_jmp, 1);
}
#define MUST_FAULT(code) do { \
fault_expected = 1; \
if (!sigsetjmp(fault_jmp, 1)) { \
code; /* should pagefault -> sighandler does longjmp */ \
fail("'" #code "' did not cause fault"); \
} \
else { \
pass("'" #code "' faulted"); \
} \
} while (0)
diag("Testing file access (synthetic)");
struct sigaction act, saveact;
act.sa_handler = sigfault_handler;
act.sa_flags = 0;
ok1(!sigemptyset(&act.sa_mask));
ok1(!sigaction(SIGSEGV, &act, &saveact));
/* ram limited to exactly 3 pages (so that we know we trigger reclaim on
* exactly when allocating more) */
ram0 = ram_new(NULL, NULL);
ok1(ram0);
ram = ram_limited_new(ram0, 3);
ok1(ram);
PS = ram->pagesize;
PSb = PS / sizeof(blk_t); /* page size in blk_t units */
/* ensure we are starting from new ram */
ok1(list_empty(&ram->lru_list));
/* setup id file */
struct bigfile_ops x_ops = {.loadblk = fileid_loadblk};
BigFileIdentity fileid = {
.blksize = ram->pagesize, /* artificially blksize = pagesize */
.file_ops = &x_ops,
};
err = fileh_open(fh, &fileid, ram);
ok1(!err);
ok1(list_empty(&fh->mmaps));
/* implicitly use fileh=fh */
#define CHECK_PAGE(page, pgoffset, pgstate, pgrefcnt) \
__CHECK_PAGE(page, fh, pgoffset, pgstate, pgrefcnt)
#define CHECK_NOPAGE(pgoffset) __CHECK_NOPAGE(fh, pgoffset)
err = fileh_mmap(vma, fh, 100, 4);
ok1(!err);
ok1(fh->mmaps.next == &vma->same_fileh);
ok1(vma->same_fileh.next == &fh->mmaps);
/* all pages initially unmapped
* M R W */
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1(!M(vma, 2)); MUST_FAULT( B(vma, 2*PSb) ); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
CHECK_NOPAGE( 100 );
CHECK_NOPAGE( 101 );
CHECK_NOPAGE( 102 );
CHECK_NOPAGE( 103 );
ok1(list_empty(&ram->lru_list));
/* simulate read access to page[0] - it should load it */
diag("read page[0]");
vma_on_pagefault(vma, vma->addr_start + 0*PS, 0);
ok1( M(vma, 0)); B(vma, 0*PSb); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1(!M(vma, 2)); MUST_FAULT( B(vma, 2*PSb) ); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
page0 = pagemap_get(&fh->pagemap, 100);
CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
CHECK_NOPAGE( 101 );
CHECK_NOPAGE( 102 );
CHECK_NOPAGE( 103 );
ok1(B(vma, 0*PSb + 0) == 100);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(ram->lru_list.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* simulate write access to page[2] - it should load it and mark page dirty */
diag("write page[2]");
vma_on_pagefault(vma, vma->addr_start + 2*PS, 1);
ok1( M(vma, 0)); B(vma, 0*PSb); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
page2 = pagemap_get(&fh->pagemap, 102);
CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_NOPAGE( 103 );
ok1(B(vma, 0*PSb + 0) == 100);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(B(vma, 2*PSb + 0) == 12); /* overwritten at fault w check */
ok1(B(vma, 2*PSb + 1) == 102);
ok1(B(vma, 2*PSb + PSb - 1) == 102);
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* read access to page[3] - load */
diag("read page[3]");
vma_on_pagefault(vma, vma->addr_start + 3*PS, 0);
ok1( M(vma, 0)); B(vma, 0*PSb); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1( M(vma, 3)); B(vma, 3*PSb); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
page3 = pagemap_get(&fh->pagemap, 103);
CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 1);
ok1(B(vma, 0*PSb + 0) == 100);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(B(vma, 2*PSb + 0) == 12);
ok1(B(vma, 2*PSb + 1) == 102);
ok1(B(vma, 2*PSb + PSb - 1) == 102);
ok1(B(vma, 3*PSb + 0) == 103);
ok1(B(vma, 3*PSb + 1) == 103);
ok1(B(vma, 3*PSb + PSb - 1) == 103);
ok1(ram->lru_list.prev == &page3->lru);
ok1(page3->lru.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* write access to page[0] - upgrade loaded -> dirty */
diag("write page[0]");
vma_on_pagefault(vma, vma->addr_start + 0*PS, 1);
ok1( M(vma, 0)); B(vma, 0*PSb); B(vma, 0*PSb) = 10;
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1( M(vma, 3)); B(vma, 3*PSb); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 1);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 1);
ok1(B(vma, 0*PSb + 0) == 10);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(B(vma, 2*PSb + 0) == 12);
ok1(B(vma, 2*PSb + 1) == 102);
ok1(B(vma, 2*PSb + PSb - 1) == 102);
ok1(B(vma, 3*PSb + 0) == 103);
ok1(B(vma, 3*PSb + 1) == 103);
ok1(B(vma, 3*PSb + PSb - 1) == 103);
ok1(ram->lru_list.prev == &page0->lru); /* page0 became MRU */
ok1(page0->lru.prev == &page3->lru);
ok1(page3->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
/* read page[1]
*
* as 3 pages were already allocated it should trigger reclaim (we set up
* RAMLimited with 3 allocated pages max). Evicted will be page[3] - as it
* is the only PAGE_LOADED page. */
diag("read page[1]");
vma_on_pagefault(vma, vma->addr_start + 1*PS, 0);
ok1( M(vma, 0)); B(vma, 0*PSb); B(vma, 0*PSb) = 10;
ok1( M(vma, 1)); B(vma, 1*PSb); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
page1 = pagemap_get(&fh->pagemap, 101);
page3 = pagemap_get(&fh->pagemap, 103);
ok1(!page3);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 1);
CHECK_PAGE (page1, 101, PAGE_LOADED, 1);
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_NOPAGE( 103 );
ok1(B(vma, 0*PSb + 0) == 10);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(B(vma, 1*PSb + 0) == 101);
ok1(B(vma, 1*PSb + 1) == 101);
ok1(B(vma, 1*PSb + PSb - 1) == 101);
ok1(B(vma, 2*PSb + 0) == 12);
ok1(B(vma, 2*PSb + 1) == 102);
ok1(B(vma, 2*PSb + PSb - 1) == 102);
ok1(ram->lru_list.prev == &page1->lru);
ok1(page1->lru.prev == &page0->lru);
ok1(page0->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
/* now explicit reclaim - should evict page[1] (the only PAGE_LOADED page) */
diag("reclaim");
ok1(1 == ram_reclaim(ram) );
ok1( M(vma, 0)); B(vma, 0*PSb); B(vma, 0*PSb) = 10;
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
page1 = pagemap_get(&fh->pagemap, 101);
ok1(!page1);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 1);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_NOPAGE( 103 );
ok1(B(vma, 0*PSb + 0) == 10);
ok1(B(vma, 0*PSb + 1) == 100);
ok1(B(vma, 0*PSb + PSb - 1) == 100);
ok1(B(vma, 2*PSb + 0) == 12);
ok1(B(vma, 2*PSb + 1) == 102);
ok1(B(vma, 2*PSb + PSb - 1) == 102);
/* page[3] went away */
ok1(ram->lru_list.prev == &page0->lru);
ok1(page0->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
/* unmap vma - dirty pages should stay in fh->pagemap and memory should
* not be forgotten */
diag("vma_unmap");
vma_unmap(vma);
ok1(list_empty(&fh->mmaps));
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 0);
CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page0->lru);
ok1(page0->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
b0 = page_mmap(page0, NULL, PROT_READ);
ok1(b0);
b2 = page_mmap(page2, NULL, PROT_READ);
ok1(b2);
ok1(b0[0] == 10);
ok1(b0[1] == 100);
ok1(b0[PSb - 1] == 100);
ok1(b2[0] == 12);
ok1(b2[1] == 102);
ok1(b2[PSb - 1] == 102);
xmunmap(b0, PS);
xmunmap(b2, PS);
/* map vma back - dirty pages should be there but not mapped to vma */
diag("vma mmap again");
err = fileh_mmap(vma, fh, 100, 4);
ok1(!err);
ok1(fh->mmaps.next == &vma->same_fileh);
ok1(vma->same_fileh.next == &fh->mmaps);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1(!M(vma, 2)); MUST_FAULT( B(vma, 2*PSb) ); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 0);
CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page0->lru);
ok1(page0->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
/* read access to page[2] - should map it R/W - the page is in PAGE_DIRTY state */
diag("read page[2]");
vma_on_pagefault(vma, vma->addr_start + 2*PS, 0);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* discard - changes should go away */
diag("discard");
fileh_dirty_discard(fh);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1(!M(vma, 2)); MUST_FAULT( B(vma, 2*PSb) ); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
CHECK_PAGE (page0, 100, PAGE_EMPTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_EMPTY, 0);
CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* writeout in 3 variants - STORE, MARK, STORE+MARK */
diag("writeout");
/* storeblk which just remembers which blk was written out */
blk_t blkv[16];
size_t blkv_len;
int storeblk_trace(BigFile *file, blk_t blk, const void *buf)
{
ok1(blkv_len < ARRAY_SIZE(blkv));
blkv[blkv_len++] = blk;
return 0;
}
x_ops.storeblk = storeblk_trace;
/* read page[3] (so that we have 1 PAGE_LOADED besides PAGE_DIRTY pages) */
ok1(!pagemap_get(&fh->pagemap, 103));
vma_on_pagefault(vma, vma->addr_start + 3*PS, 0);
page3 = pagemap_get(&fh->pagemap, 103);
ok1(page3);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1(!M(vma, 2)); MUST_FAULT( B(vma, 2*PSb) ); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1( M(vma, 3)); B(vma, 3*PSb); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
CHECK_PAGE (page0, 100, PAGE_EMPTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_EMPTY, 0);
CHECK_PAGE (page3, 103, PAGE_LOADED, 1);
ok1(ram->lru_list.prev == &page3->lru);
ok1(page3->lru.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* prepare state (2 dirty pages, only 1 mapped) */
void mkdirty2() {
vma_on_pagefault(vma, vma->addr_start + 0*PS, 1); /* write page[0] */
vma_on_pagefault(vma, vma->addr_start + 2*PS, 1); /* write page[2] */
vma_unmap(vma);
err = fileh_mmap(vma, fh, 100, 4);
ok1(!err);
vma_on_pagefault(vma, vma->addr_start + 2*PS, 0);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 0);
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &page3->lru);
ok1(page3->lru.prev == &ram->lru_list);
}
diag("writeout (store)");
mkdirty2();
blkv_len = 0;
ok1(!fileh_dirty_writeout(fh, WRITEOUT_STORE));
ok1(blkv_len == 2);
ok1(blkv[0] == 100);
ok1(blkv[1] == 102);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); B(vma, 2*PSb) = 12;
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1( fh->dirty);
CHECK_PAGE (page0, 100, PAGE_DIRTY, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 0);
diag("writeout (mark)");
blkv_len = 0;
ok1(!fileh_dirty_writeout(fh, WRITEOUT_MARKSTORED));
ok1(blkv_len == 0);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
CHECK_PAGE (page0, 100, PAGE_LOADED, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_LOADED, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 0);
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &page3->lru);
ok1(page3->lru.prev == &ram->lru_list);
diag("writeout (store+mark)");
mkdirty2();
blkv_len = 0;
ok1(!fileh_dirty_writeout(fh, WRITEOUT_STORE | WRITEOUT_MARKSTORED));
ok1(blkv_len == 2);
ok1(blkv[0] == 100);
ok1(blkv[1] == 102);
ok1(!M(vma, 0)); MUST_FAULT( B(vma, 0*PSb) ); MUST_FAULT( B(vma, 0*PSb) = 10 );
ok1(!M(vma, 1)); MUST_FAULT( B(vma, 1*PSb) ); MUST_FAULT( B(vma, 1*PSb) = 11 );
ok1( M(vma, 2)); B(vma, 2*PSb); MUST_FAULT( B(vma, 2*PSb) = 12 );
ok1(!M(vma, 3)); MUST_FAULT( B(vma, 3*PSb) ); MUST_FAULT( B(vma, 3*PSb) = 13 );
ok1(!fh->dirty);
CHECK_PAGE (page0, 100, PAGE_LOADED, 0);
CHECK_NOPAGE( 101 );
CHECK_PAGE (page2, 102, PAGE_LOADED, 1);
CHECK_PAGE (page3, 103, PAGE_LOADED, 0);
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &page3->lru);
ok1(page3->lru.prev == &ram->lru_list);
diag("fileh_close");
/* dirty some pages again - so that test fileh_close with not all pages being non-dirty */
mkdirty2();
vma_unmap(vma);
/* ensure pages stay in ram lru with expected state */
ok1(ram->lru_list.prev == &page2->lru); ok1(page2->state == PAGE_DIRTY);
ok1(page2->lru.prev == &page0->lru); ok1(page0->state == PAGE_DIRTY);
ok1(page0->lru.prev == &page3->lru); ok1(page3->state == PAGE_LOADED);
ok1(page3->lru.prev == &ram->lru_list);
fileh_close(fh);
/* pages associated with fileh should go away after fileh_close() */
ok1(list_empty(&ram->lru_list));
/* free resources & restore SIGSEGV handler */
ram_close(ram);
ram_close(ram0);
ok1(!sigaction(SIGSEGV, &saveact, NULL));
#undef CHECK_PAGE
#undef CHECK_NOPAGE
}
/* file access via real pagefault
*
* this test tests that SIGSEGV pagefault handler works and only that. Most of
* virtual memory behaviour is more explicitly tested in
* test_file_access_synthetic().
*/
void test_file_access_pagefault()
{
RAM *ram;
BigFileH fh_struct, *fh = &fh_struct;
VMA vma_struct, *vma = &vma_struct;
Page *page0, *page2, *page3;
size_t PS, PSb;
int err;
diag("Testing file access (pagefault)");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
ram = ram_new(NULL,NULL);
ok1(ram);
PS = ram->pagesize;
PSb = PS / sizeof(blk_t); /* page size in blk_t units */
/* ensure we are starting from new ram */
ok1(list_empty(&ram->lru_list));
/* setup id file */
BigFileIdentity fileid = {
.blksize = ram->pagesize, /* artificially blksize = pagesize */
.file_ops = &fileid_ops,
};
err = fileh_open(fh, &fileid, ram);
ok1(!err);
/* implicitly use fileh=fh */
#define CHECK_PAGE(page, pgoffset, pgstate, pgrefcnt) \
__CHECK_PAGE(page, fh, pgoffset, pgstate, pgrefcnt)
#define CHECK_NOPAGE(pgoffset) __CHECK_NOPAGE(fh, pgoffset)
err = fileh_mmap(vma, fh, 100, 4);
ok1(!err);
/* all pages initially unmapped */
ok1(!M(vma, 0)); CHECK_NOPAGE( 100 );
ok1(!M(vma, 1)); CHECK_NOPAGE( 101 );
ok1(!M(vma, 2)); CHECK_NOPAGE( 102 );
ok1(!M(vma, 3)); CHECK_NOPAGE( 103 );
ok1(list_empty(&ram->lru_list));
/* read page[0] */
ok1(B(vma, 0*PSb) == 100);
page0 = pagemap_get(&fh->pagemap, 100);
ok1( M(vma, 0)); CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
ok1(!M(vma, 1)); CHECK_NOPAGE( 101 );
ok1(!M(vma, 2)); CHECK_NOPAGE( 102 );
ok1(!M(vma, 3)); CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* write to page[2] */
B(vma, 2*PSb) = 12;
page2 = pagemap_get(&fh->pagemap, 102);
ok1( M(vma, 0)); CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
ok1(!M(vma, 1)); CHECK_NOPAGE( 101 );
ok1( M(vma, 2)); CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
ok1(!M(vma, 3)); CHECK_NOPAGE( 103 );
ok1(ram->lru_list.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* read page[3] */
ok1(B(vma, 3*PSb) == 103);
page3 = pagemap_get(&fh->pagemap, 103);
ok1( M(vma, 0)); CHECK_PAGE (page0, 100, PAGE_LOADED, 1);
ok1(!M(vma, 1)); CHECK_NOPAGE( 101 );
ok1( M(vma, 2)); CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
ok1( M(vma, 3)); CHECK_PAGE (page3, 103, PAGE_LOADED, 1);
ok1(ram->lru_list.prev == &page3->lru);
ok1(page3->lru.prev == &page2->lru);
ok1(page2->lru.prev == &page0->lru);
ok1(page0->lru.prev == &ram->lru_list);
/* write to page[0] */
B(vma, 0*PSb) = 10;
ok1( M(vma, 0)); CHECK_PAGE (page0, 100, PAGE_DIRTY, 1);
ok1(!M(vma, 1)); CHECK_NOPAGE( 101 );
ok1( M(vma, 2)); CHECK_PAGE (page2, 102, PAGE_DIRTY, 1);
ok1( M(vma, 3)); CHECK_PAGE (page3, 103, PAGE_LOADED, 1);
ok1(ram->lru_list.prev == &page0->lru); /* page0 became MRU */
ok1(page0->lru.prev == &page3->lru);
ok1(page3->lru.prev == &page2->lru);
ok1(page2->lru.prev == &ram->lru_list);
/* unmap vma */
vma_unmap(vma);
/* free resources */
fileh_close(fh);
// ok1(list_empty(&ram->lru_list));
ram_close(ram);
}
/*
* test that pagefault saves/restores thread state correctly
*/
void test_pagefault_savestate()
{
RAM *ram;
BigFileH fh_struct, *fh = &fh_struct;
VMA vma_struct, *vma = &vma_struct;
int err;
diag("Testing how pagefault handler saves/restores thread state");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
ram = ram_new(NULL,NULL);
ok1(ram);
/* setup bad file */
volatile int loadblk_run;
int badfile_loadblk(BigFile *file, blk_t blk, void *buf)
{
/* we are bad file - just say everything is ok... */
/* and before that corrup thread state - to verify that pagefault handler
* will restore it. */
errno = 98;
/* Also tell we were here via, so that the test can be sure we actually
* tried to make things go bad. */
loadblk_run = 1;
return 0;
}
const struct bigfile_ops badfile_ops = {
.loadblk = badfile_loadblk,
};
BigFile f = {
.blksize = ram->pagesize, /* artificial */
.file_ops = &badfile_ops,
};
err = fileh_open(fh, &f, ram);
ok1(!err);
err = fileh_mmap(vma, fh, 0, 1);
ok1(!err);
/* before we touched anything */
errno = 1;
loadblk_run = 0;
ok1(errno == 1);
ok1(!loadblk_run);
/* read page[0] - it should trigger badfile_loadblk() */
ok1(B(vma, 0) == 0);
ok1(loadblk_run);
ok1(errno == 1);
/* free resources */
vma_unmap(vma);
fileh_close(fh);
ram_close(ram);
#undef CHECK_PAGE
#undef CHECK_NOPAGE
}
// TODO test for loadblk that returns -1
int main()
{
tap_fail_callback = abort; // XXX to catch failure immediately
test_vmamap();
test_file_access_synthetic();
test_file_access_pagefault();
test_pagefault_savestate();
return 0;
}
...@@ -26,6 +26,10 @@ ...@@ -26,6 +26,10 @@
*/ */
// XXX better link with it // XXX better link with it
#include "../virtmem.c"
#include "../pagemap.c"
#include "../ram.c"
#include "../ram_shmfs.c"
#include "../pagefault.c" #include "../pagefault.c"
#include <ccan/tap/tap.h> #include <ccan/tap/tap.h>
...@@ -33,6 +37,8 @@ ...@@ -33,6 +37,8 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "../../t/t_utils.h"
static void prefault() static void prefault()
{ {
...@@ -64,6 +70,115 @@ void fault_write() ...@@ -64,6 +70,115 @@ void fault_write()
} }
/* fault in loadblk (= doublefault) */
void fault_in_loadblk()
{
RAM *ram;
BigFileH fh;
VMA vma_struct, *vma = &vma_struct;
size_t PS;
int err;
diag("testing pagefault v.s. fault in loadblk");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
ram = ram_new(NULL,NULL);
ok1(ram);
PS = ram->pagesize;
/* loadblk, simulating error in storage layer, touches memory in vma for
* another blk -> doublefault */
int faulty_loadblk(BigFile *file, blk_t blk, void *buf)
{
/* touch page[1] - should crash here */
b(vma, 1*PS);
return 0;
}
const struct bigfile_ops faulty_ops = {
.loadblk = faulty_loadblk,
};
BigFile f = {
.blksize = ram->pagesize, /* artificial */
.file_ops = &faulty_ops,
};
err = fileh_open(&fh, &f, ram);
ok1(!err);
err = fileh_mmap(vma, &fh, 0, 2);
ok1(!err);
/* touch page[0] - should dive into loadblk and doublefault there */
prefault();
b(vma, 0);
}
/* fault in storeblk (single fault - but should die) */
void fault_in_storeblk()
{
RAM *ram;
BigFileH fh;
VMA vma_struct, *vma = &vma_struct;
size_t PS;
int err;
diag("testing pagefault v.s. fault in storeblk");
// XXX save/restore sigaction ?
ok1(!pagefault_init());
ram = ram_new(NULL,NULL);
ok1(ram);
PS = ram->pagesize;
/* empty loadblk - memory will just stay as it is (all 0) */
int empty_loadblk(BigFile *file, blk_t blk, void *buf)
{ return 0; }
/* storeblk "incorrectly" accesses other protected memory which should be
* catched and SIGSEGV */
int faulty_storeblk(BigFile *file, blk_t blk, const void *buf)
{
/* read page[1] - should crash here */
b(vma, 1*PS);
return 0;
}
const struct bigfile_ops faulty_ops = {
.loadblk = empty_loadblk,
.storeblk = faulty_storeblk,
};
BigFile f = {
.blksize = ram->pagesize, /* artificial */
.file_ops = &faulty_ops,
};
err = fileh_open(&fh, &f, ram);
ok1(!err);
err = fileh_mmap(vma, &fh, 0, 2);
ok1(!err);
/* write to page[0] -> page[0] becomes dirty */
b(vma, 0) = 1;
/* writeout calls storeblk which faults */
prefault();
fileh_dirty_writeout(&fh, WRITEOUT_STORE);
}
static const struct { static const struct {
const char *name; const char *name;
void (*test)(void); void (*test)(void);
...@@ -72,6 +187,8 @@ static const struct { ...@@ -72,6 +187,8 @@ static const struct {
// name func-where-it-dies // name func-where-it-dies
{"faultr", fault_read}, // on_pagefault {"faultr", fault_read}, // on_pagefault
{"faultw", fault_write}, // on_pagefault {"faultw", fault_write}, // on_pagefault
{"fault_loadblk", fault_in_loadblk}, // faulty_loadblk
{"fault_storeblk", fault_in_storeblk}, // faulty_storeblk
}; };
int main(int argc, char *argv[]) int main(int argc, char *argv[])
......
...@@ -15,14 +15,362 @@ ...@@ -15,14 +15,362 @@
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* *
* See COPYING file for full licensing terms. * See COPYING file for full licensing terms.
*
*
* TODO description
*/ */
#include <wendelin/bigfile/virtmem.h> #include <wendelin/bigfile/virtmem.h>
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/pagemap.h>
#include <wendelin/bigfile/ram.h> #include <wendelin/bigfile/ram.h>
#include <wendelin/bug.h>
#include <ccan/minmax/minmax.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <errno.h> #include <errno.h>
#include <signal.h>
#include <stdio.h>
static size_t page_size(const Page *page);
static void page_drop_memory(Page *page);
static void *vma_page_addr(VMA *vma, Page *page);
static pgoff_t vma_addr_fpgoffset(VMA *vma, uintptr_t addr);
static int vma_page_ismapped(VMA *vma, Page *page);
static void vma_page_ensure_unmapped(VMA *vma, Page *page);
static void vma_page_ensure_notmappedrw(VMA *vma, Page *page);
#define VIRT_DEBUG 0
#if VIRT_DEBUG
# define TRACE(msg, ...) do { fprintf(stderr, msg, ## __VA_ARGS__); } while (0)
#else
# define TRACE(msg, ...) do {} while(0)
#endif
// TODO client code - block/unblock SIGSEGV so that we do not try to
// incorrectly handle pagefault and just die with coredump
/****************
* OPEN / CLOSE *
****************/
// TODO block SIGSEGV
int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram)
{
bzero(fileh, sizeof(*fileh));
fileh->ramh = ramh_open(ram);
if (!fileh->ramh)
goto out;
fileh->file = file;
INIT_LIST_HEAD(&fileh->mmaps);
pagemap_init(&fileh->pagemap, ilog2_exact(ram->pagesize));
return 0;
out:
return -1;
}
// TODO block SIGSEGV
void fileh_close(BigFileH *fileh)
{
Page *page;
/* it's an error to close fileh with existing mappings */
// XXX implement the same semantics usual files have wrt mmaps - if we release
// fileh, but mapping exists - real fileh release is delayed to last unmap ?
BUG_ON(!list_empty(&fileh->mmaps));
/* drop all pages (dirty or not) associated with this fileh */
pagemap_for_each(page, &fileh->pagemap) {
page_drop_memory(page);
list_del(&page->lru);
bzero(page, sizeof(*page)); /* just in case */
free(page);
}
/* and clear pagemap */
pagemap_clear(&fileh->pagemap);
if (fileh->ramh)
ramh_close(fileh->ramh);
bzero(fileh, sizeof(*fileh));
}
/****************
* MMAP / UNMAP *
****************/
// TODO block SIGSEGV
int fileh_mmap(VMA *vma, BigFileH *fileh, pgoff_t pgoffset, pgoff_t pglen)
{
void *addr;
size_t len = pglen * fileh->ramh->ram->pagesize;
/* alloc vma->page_ismappedv[] */
bzero(vma, sizeof(*vma));
vma->page_ismappedv = bitmap_alloc0(pglen);
if (!vma->page_ismappedv)
goto err;
/* allocate address space somewhere */
addr = mem_valloc(NULL, len);
if (!addr)
goto err;
/* everything allocated - link it up */
vma->addr_start = (uintptr_t)addr;
vma->addr_stop = vma->addr_start + len;
vma->fileh = fileh;
vma->f_pgoffset = pgoffset;
// XXX locking - linking up vs concurrent traversal
// XXX need to init vma->virt_list first?
/* hook vma to fileh->mmaps */
list_add_tail(&vma->same_fileh, &fileh->mmaps);
/* register vma for pagefault handling */
virt_register_vma(vma);
return 0;
err:
free(vma->page_ismappedv);
vma->page_ismappedv = NULL;
return -1;
}
// TODO block SIGSEGV
void vma_unmap(VMA *vma)
{
BigFileH *fileh = vma->fileh;
size_t len = vma->addr_stop - vma->addr_start;
size_t pglen = len / fileh->ramh->ram->pagesize;
int i;
pgoff_t pgoffset;
Page *page;
// XXX locking vs concurrent access
/* unregister from vmamap - so that pagefault handler does not recognize
* this area as valid */
virt_unregister_vma(vma);
/* unlink from fileh.mmaps XXX place ok ? */
list_del_init(&vma->same_fileh);
/* unmap whole vma at once - the kernel unmaps each mapping in turn.
* NOTE error here would mean something is broken */
xmunmap((void *)vma->addr_start, len);
/* scan through mapped-to-this-vma pages and release them */
for (i=0; i < pglen; ++i) {
if (!bitmap_test_bit(vma->page_ismappedv, i))
continue;
pgoffset = vma->f_pgoffset + i;
page = pagemap_get(&fileh->pagemap, pgoffset);
BUG_ON(!page);
page_decref(page);
}
/* free memory and be done */
free(vma->page_ismappedv);
bzero(vma, sizeof(*vma));
}
/**********************
* WRITEOUT / DISCARD *
**********************/
// XXX vs concurrent access in other threads
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags)
{
Page *page;
BigFile *file = fileh->file;
struct list_head *hmmap;
sigset_t mask_segv, save_sigset;
int err = 0;
/* check flags */
if (!(flags & (WRITEOUT_STORE | WRITEOUT_MARKSTORED)) ||
flags & ~(WRITEOUT_STORE | WRITEOUT_MARKSTORED))
return -EINVAL;
// TODO refactor it out of here
/* block SIGSEGV - we are not client and should not access any not-mmapped
* memory -> so on any pagefault we should die, not try to handle it
*
* NOTE sigmask is per-thread. There is no race here wrt other threads
* correctly accessing data. */
xsigemptyset(&mask_segv);
xsigaddset(&mask_segv, SIGSEGV);
xpthread_sigmask(SIG_BLOCK, &mask_segv, &save_sigset);
/* write out dirty pages */
pagemap_for_each(page, &fileh->pagemap) {
/* XXX we scan whole file pages which could be slow
* TODO -> maintain something like separate dirty_list ? */
if (page->state != PAGE_DIRTY)
continue;
/* ->storeblk() */
if (flags & WRITEOUT_STORE) {
TODO (file->blksize != page_size(page));
blk_t blk = page->f_pgoffset; // NOTE assumes blksize = pagesize
void *pagebuf;
int mapped_tmp = 0;
if (!page->refcnt) {
/* page not mmaped anywhere - mmap it temporarily somewhere */
pagebuf = page_mmap(page, NULL, PROT_READ);
TODO(!pagebuf); // XXX err
mapped_tmp = 1;
}
else {
/* some vma mmaps page - use that memory directly */
/* XXX this assumes there is small #vma and is ugly - in general it
* should be simpler via back-pointers from page? */
pagebuf = NULL;
list_for_each(hmmap, &fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
if (vma_page_ismapped(vma, page)) {
pagebuf = vma_page_addr(vma, page);
break;
}
}
BUG_ON(!pagebuf);
}
err = file->file_ops->storeblk(file, blk, pagebuf);
if (mapped_tmp)
xmunmap(pagebuf, page_size(page));
if (err)
goto out;
}
/* page.state -> PAGE_LOADED and correct mappings RW -> R */
if (flags & WRITEOUT_MARKSTORED) {
page->state = PAGE_LOADED;
list_for_each(hmmap, &fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
vma_page_ensure_notmappedrw(vma, page);
}
}
}
if (flags & WRITEOUT_MARKSTORED)
fileh->dirty = 0;
out:
xpthread_sigmask(SIG_SETMASK, &save_sigset, NULL);
return err;
}
// TODO block SIGSEGV
// XXX vs concurrent access in other threads
void fileh_dirty_discard(BigFileH *fileh)
{
Page *page;
/* XXX we scan whole file pages which could be slow
* TODO -> maintain something like separate dirty_list ? */
pagemap_for_each(page, &fileh->pagemap)
if (page->state == PAGE_DIRTY)
page_drop_memory(page);
fileh->dirty = 0;
}
/************************
* Lookup VMA by addr *
************************/
/* list of all registered VMA(s) */
static LIST_HEAD(vma_list);
/* protects ^^^ XXX */
//spinlock_t vma_list_lock;
/* lookup VMA covering `addr`. NULL if not found */
// XXX protection against concurrent vma_list updates & lookups
// XXX virt_lookup_vma() operates without taking locks - XXX no -> we'll use spinlock
// (we don't know whether
// address is ours while calling it) - so it must operate correctly in
// lock-free. Updates to vma_list should thus be also done carefully.
VMA *virt_lookup_vma(void *addr)
{
uintptr_t uaddr = (uintptr_t)addr;
struct list_head *h;
VMA *vma;
list_for_each(h, &vma_list) {
// XXX -> list_for_each_entry
vma = list_entry(h, typeof(*vma), virt_list);
if (uaddr < vma->addr_stop)
/*
* here: vma->addr_start ? uaddr < vma->addr_stop
* vma->addr_stop is first such addr_stop
*/
return (vma->addr_start <= uaddr) ? vma : NULL;
}
return NULL; /* not found at all or no overlap */
}
/* register VMA `vma` as covering some file view */
// XXX protection against concurrent updates & lookups
void virt_register_vma(VMA *vma)
{
uintptr_t uaddr = vma->addr_start;
struct list_head *h;
struct VMA *a;
list_for_each(h, &vma_list) {
a = list_entry(h, typeof(*a), virt_list);
if (uaddr < a->addr_stop)
break;
}
/* either before found vma or, if not found, at the end of the list */
list_add_tail(&vma->virt_list, h);
}
/* remove `area` from VMA registry. `area` must be registered before */
// XXX protection against concurrent updates & lookups
void virt_unregister_vma(VMA *vma)
{
/* _init - to clear links, just in case */
list_del_init(&vma->virt_list);
}
/*****************************************/
/* /*
* allocate virtual memory address space * allocate virtual memory address space
...@@ -62,6 +410,193 @@ void *mem_xvalloc(void *addr, size_t len) ...@@ -62,6 +410,193 @@ void *mem_xvalloc(void *addr, size_t len)
} }
/*********************
* PAGEFAULT HANDLER *
*********************/
/* pagefault entry when we know request came to our memory area */
void vma_on_pagefault(VMA *vma, uintptr_t addr, int write)
{
pgoff_t pagen;
Page *page;
BigFileH *fileh;
/* continuing on_pagefault() - see (1) there ... */
/* (2) vma, addr -> fileh, pagen ;idx of fileh page covering addr */
fileh = vma->fileh;
pagen = vma_addr_fpgoffset(vma, addr);
/* (3) fileh, pagen -> page (via pagemap) */
page = pagemap_get(&fileh->pagemap, pagen);
/* (4) no page found - allocate new from ram */
while (!page) {
page = ramh_alloc_page(fileh->ramh, pagen);
if (!page) {
/* try to release some memory back to OS */
// XXX do we need and how to distinguish "no ram page" vs "no memory for `struct page`"?
// -> no we don't -- better allocate memory for struct pages for whole RAM at ram setup
if (!ram_reclaim(fileh->ramh->ram))
OOM();
continue;
}
/* ramh set up .ramh, .ramh_pgoffset, .state?
* now setup rest (link to fileh) */
page->fileh = fileh;
page->f_pgoffset = pagen;
/* remember page in fileh->pagemap[pagen] */
pagemap_set(&fileh->pagemap, pagen, page);
}
/* (5) if page was not yet loaded - load it */
// XXX protect from concurrent loading of the same page (should be ok with mutex)
if (page->state < PAGE_LOADED) {
/* NOTE if we load data in-place, there would be a race with concurrent
* access to the page here - after first enabling memory-access to
* the page, other threads could end up reading corrupt data, while
* loading had not finished.
*
* so to avoid it we first load data to separate memory address, then
* mmap-duplicate that page into here, but it is more work compared to
* what kernel internally does.
*
* TODO try to use remap_anon_pages() when it is ready
* (but unfortunately it is only for anonymous memory)
* NOTE remap_file_pages() is going away...
*/
blk_t blk;
void *pageram;
int err;
/*
* if pagesize < blksize - need to prepare several adjacent pages for blk;
* if pagesize > blksize - will need to either 1) rescan which blk got
* dirty, or 2) store not-even-touched blocks adjacent to modified one.
*/
TODO (fileh->file->blksize != page_size(page));
// FIXME doing this mmap-to-temp/unmap is somewhat costly. Better
// constantly have whole RAM mapping somewhere R/W and load there.
// (XXX but then we'll either have
// - VMA fragmented (if we manage whole RAM as 1 file of physram size),
// - or need to waste a lot of address space (size of each ramh can be very large)
//
// generally this way it also has major problems)
//
// Also this way, we btw don't need to require python code to drop all
// references to loading buf.
/* mmap page memory temporarily somewhere
* XXX better pre-map all ram pages r/w in another area to not need to mmap/unmap it here
* -> will run slightly faster (but major slowdown is in clear_page in kernel)
*/
// TODO MAP_UNINITIALIZED somehow? (we'll overwrite that memory)
pageram = page_mmap(page, NULL, PROT_READ | PROT_WRITE);
TODO(!pageram); // XXX err
/* loadblk() -> pageram memory */
// XXX locking, vs gil?
blk = page->f_pgoffset; // NOTE because blksize = pagesize
err = fileh->file->file_ops->loadblk(fileh->file, blk, pageram);
/* TODO on error -> try to throw exception somehow to the caller, so
* that it can abort current transaction, but not die.
*
* NOTE for analogue situation when read for mmaped file fails, the
* kernel sends SIGBUS
*/
TODO (err);
xmunmap(pageram, page_size(page));
page->state = PAGE_LOADED;
}
/* (6) page data ready. Mmap it atomically into vma address space, or mprotect
* appropriately if it was already mmaped. */
int prot = PROT_READ;
PageState newstate = PAGE_LOADED;
if (write || page->state == PAGE_DIRTY) {
prot |= PROT_WRITE;
newstate = PAGE_DIRTY;
}
if (!bitmap_test_bit(vma->page_ismappedv, page->f_pgoffset - vma->f_pgoffset)) {
// XXX err
page_mmap(page, vma_page_addr(vma, page), prot);
bitmap_set_bit(vma->page_ismappedv, page->f_pgoffset - vma->f_pgoffset);
page_incref(page);
}
else {
/* just changing protection bits should not fail, if parameters ok */
xmprotect(vma_page_addr(vma, page), page_size(page), prot);
}
// XXX also call page->markdirty() ?
page->state = max(page->state, newstate);
if (page->state == PAGE_DIRTY)
fileh->dirty = 1;
/* mark page as used recently */
// XXX = list_move_tail()
list_del(&page->lru);
list_add_tail(&page->lru, &page->ramh->ram->lru_list);
/*
* (7) access to page prepared - now it is ok to return from signal handler
* - the caller will re-try executing faulting instruction.
*/
return;
}
/***********
* RECLAIM *
***********/
#define RECLAIM_BATCH 64 /* how many pages to reclaim at once */
int ram_reclaim(RAM *ram)
{
struct list_head *lru_list = &ram->lru_list;
struct list_head *hlru;
Page *page;
int batch = RECLAIM_BATCH, scanned = 0;
TRACE("RAM_RECLAIM\n");
hlru = lru_list->next;
while (batch && hlru != lru_list) {
page = list_entry(hlru, typeof(*page), lru);
hlru = hlru->next;
scanned++;
/* can release ram only from loaded non-dirty pages */
if (page->state == PAGE_LOADED) {
page_drop_memory(page);
batch--;
}
/* PAGE_EMPTY pages without mappers go away */
if (page->state == PAGE_EMPTY) {
BUG_ON(page->refcnt != 0); // XXX what for then we have refcnt? -> vs discard
/* delete page & its entry in fileh->pagemap */
pagemap_del(&page->fileh->pagemap, page->f_pgoffset);
list_del(&page->lru);
bzero(page, sizeof(*page)); /* just in case */
free(page);
}
}
TRACE("\t-> reclaimed %i scanned %i\n", RECLAIM_BATCH - batch, scanned);
return RECLAIM_BATCH - batch;
}
/******************** /********************
* Internal helpers * * Internal helpers *
********************/ ********************/
...@@ -91,3 +626,99 @@ void *page_mmap(Page *page, void *addr, int prot) ...@@ -91,3 +626,99 @@ void *page_mmap(Page *page, void *addr, int prot)
// XXX better call ramh_mmap_page() without tinkering wih ramh_ops? // XXX better call ramh_mmap_page() without tinkering wih ramh_ops?
return ramh->ramh_ops->mmap_page(ramh, page->ramh_pgoffset, addr, prot); return ramh->ramh_ops->mmap_page(ramh, page->ramh_pgoffset, addr, prot);
} }
static void page_drop_memory(Page *page)
{
/* Memory for this page goes out. 1) unmap it from all mmaps */
struct list_head *hmmap;
if (page->state == PAGE_EMPTY)
return;
list_for_each(hmmap, &page->fileh->mmaps) {
VMA *vma = list_entry(hmmap, typeof(*vma), same_fileh);
vma_page_ensure_unmapped(vma, page);
}
/* 2) release memory to ram */
ramh_drop_memory(page->ramh, page->ramh_pgoffset);
page->state = PAGE_EMPTY;
// XXX touch lru?
}
/* vma: page -> addr where it should-be mmaped in vma */
static void *vma_page_addr(VMA *vma, Page *page)
{
uintptr_t addr;
ASSERT(vma->fileh == page->fileh); // XXX needed here?
addr = vma->addr_start + (page->f_pgoffset - vma->f_pgoffset) * page_size(page);
ASSERT(vma->addr_start <= addr &&
addr < vma->addr_stop);
return (void *)addr;
}
/* vma: addr -> fileh pgoffset with page containing addr */
static pgoff_t vma_addr_fpgoffset(VMA *vma, uintptr_t addr)
{
return vma->f_pgoffset + (addr - vma->addr_start) / vma->fileh->ramh->ram->pagesize;
}
/* is `page` mapped to `vma` */
static int vma_page_ismapped(VMA *vma, Page *page)
{
pgoff_t vma_fpgstop;
ASSERT(vma->fileh == page->fileh);
vma_fpgstop = vma_addr_fpgoffset(vma, vma->addr_stop);
if (!(vma->f_pgoffset <= page->f_pgoffset &&
page->f_pgoffset < vma_fpgstop))
return 0;
return bitmap_test_bit(vma->page_ismappedv, page->f_pgoffset - vma->f_pgoffset);
}
/* ensure `page` is not mapped to `vma` */
static void vma_page_ensure_unmapped(VMA *vma, Page *page)
{
if (!vma_page_ismapped(vma, page))
return;
/* mmap empty PROT_NONE address space instead of page memory */
mem_xvalloc(vma_page_addr(vma, page), page_size(page));
bitmap_clear_bit(vma->page_ismappedv, page->f_pgoffset - vma->f_pgoffset);
page_decref(page);
}
/* ensure `page` is not mapped RW to `vma`
*
* if mapped -> should be mapped as R
* if not mapped - leave as is
*/
static void vma_page_ensure_notmappedrw(VMA *vma, Page *page)
{
if (!vma_page_ismapped(vma, page))
return;
/* just changing protection - should not fail */
// XXX PROT_READ always? (it could be mmaped with PROT_NONE before without
// first access) - then it should not be mapped in page_ismappedv -> ok.
xmprotect(vma_page_addr(vma, page), page_size(page), PROT_READ);
}
// XXX stub
void OOM(void)
{
BUG();
}
...@@ -18,15 +18,62 @@ ...@@ -18,15 +18,62 @@
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* *
* See COPYING file for full licensing terms. * See COPYING file for full licensing terms.
*
* ~~~~~~~~
*
* Virtual memory connects BigFile content and RAM pages into file memory
* mappings.
*
* Read access to mapped pages cause their on-demand loading, and write access
* marks modified pages as dirty. Dirty pages then can be on request either
* written out back to file or discarded.
*/ */
#include <stdint.h> #include <stdint.h>
#include <wendelin/list.h> #include <wendelin/list.h>
#include <wendelin/bigfile/types.h>
#include <wendelin/bigfile/pagemap.h>
#include <ccan/bitmap/bitmap.h> // XXX can't forward-decl for bitmap
typedef struct RAM RAM;
typedef struct RAMH RAMH; typedef struct RAMH RAMH;
typedef struct Page Page;
typedef struct BigFile BigFile;
/* BigFile Handle
*
* BigFile handle is a representation of file snapshot that could be locally
* modified in-memory. The changes could be later either discarded or stored
* back to file. One file can have many opened handles each with its own
* modifications and optionally ram.
*/
struct BigFileH {
BigFile *file;
/* ram handle, backing this fileh mappings */
RAMH *ramh;
/* fileh mappings (list of VMA)
* NOTE current design assumes there will be not many mappings
* so instead of backpointers from pages to vma mapping entries, we'll
* scan all page->fileh->mmaps to overlap with page.
*/
struct list_head mmaps; /* _ -> vma->same_fileh */
/* {} f_pgoffset -> page */
PageMap pagemap;
/* Page - describes fixed-size item of physical RAM associated with content from file */
// XXX not sure we need this
// -> currently is used to know whether to join ZODB DataManager serving ZBigFile
// XXX maybe change into dirty_list in the future?
unsigned dirty : 1;
};
typedef struct BigFileH BigFileH;
/* Page - describes fixed-size item of physical RAM associated with content from fileh */
enum PageState { enum PageState {
PAGE_EMPTY = 0, /* file content has not been loaded yet */ PAGE_EMPTY = 0, /* file content has not been loaded yet */
PAGE_LOADED = 1, /* file content has been loaded and was not modified */ PAGE_LOADED = 1, /* file content has been loaded and was not modified */
...@@ -37,6 +84,10 @@ typedef enum PageState PageState; ...@@ -37,6 +84,10 @@ typedef enum PageState PageState;
struct Page { struct Page {
PageState state; PageState state;
/* wrt fileh - associated with */
BigFileH *fileh;
pgoff_t f_pgoffset;
/* wrt ram - associated with */ /* wrt ram - associated with */
RAMH* ramh; RAMH* ramh;
pgoff_t ramh_pgoffset; pgoff_t ramh_pgoffset;
...@@ -49,6 +100,146 @@ struct Page { ...@@ -49,6 +100,146 @@ struct Page {
typedef struct Page Page; typedef struct Page Page;
/* VMA - virtual memory area representing one fileh mapping
*
* NOTE areas may not overlap in virtual address space
* (in file space they can overlap).
*/
typedef struct VMA VMA;
struct VMA {
uintptr_t addr_start, addr_stop; /* [addr_start, addr_stop) */
BigFileH *fileh; /* for which fileh */
pgoff_t f_pgoffset; /* where starts, in pages */
/* FIXME For approximation 0, VMA(s) are kept in sorted doubly-linked
* list, which is not good for lookup/add/remove performance O(n), but easy to
* program. This should be ok for first draft, as there are not many fileh
* views taken simultaneously.
*
* TODO for better performance, some binary-search-tree should be used.
*/
struct list_head virt_list; /* (virtmem.c::vma_list -> _) */
/* VMA's for the same fileh (fileh->mmaps -> _) */
struct list_head same_fileh;
/* whether corresponding to pgoffset-f_offset page is mapped in this VMA */
bitmap *page_ismappedv; /* len ~ Δaddr / pagesize */
};
/*****************************
* API for clients *
*****************************/
/* open handle for a BigFile
*
* @fileh[out] BigFileH handle to initialize for this open
* @file
* @ram RAM that will back created fileh mappings
*
* @return 0 - ok, !0 - fail
*/
int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);
/* close fileh
*
* it's an error to call fileh_close with existing mappings
*/
void fileh_close(BigFileH *fileh);
/* map fileh part into memory
*
* This "maps" fileh part [pgoffset, pglen) in pages into process address space.
*
* @vma[out] vma to initialize for this mmap
* @return 0 - ok, !0 - fail
*/
int fileh_mmap(VMA *vma, BigFileH *fileh, pgoff_t pgoffset, pgoff_t pglen);
/* unmap mapping created by fileh_mmap()
*
* This removes mapping created by fileh_mmap() from process address space.
* Changes made to fileh pages are preserved (to e.g. either other mappings and
* later commit/discard).
*/
void vma_unmap(VMA *vma);
/* what to do at writeout */
enum WriteoutFlags {
/* store dirty pages back to file
*
* - call file.storeblk() for all dirty pages;
* - pages state remains PAGE_DIRTY.
*
* to "finish" the storage use WRITEOUT_MARKSTORED in the same or separate
* call.
*/
WRITEOUT_STORE = 1 << 0,
/* mark dirty pages as stored to file ok
*
* pages state becomes PAGE_LOADED and all mmaps are updated to map pages as
* R/O to track further writes.
*/
WRITEOUT_MARKSTORED = 1 << 1,
};
/* write changes made to fileh memory back to file
*
* Perform write-related actions according to flags (see WriteoutFlags).
*
* @return 0 - ok !0 - fail
* NOTE single WRITEOUT_MARKSTORED can not fail.
*
* No guarantee is made about atomicity - e.g. if this call fails, some
* pages could be written and some left in memory in dirty state.
*/
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);
/* discard changes made to fileh memory
*
* For each fileh dirty page:
*
* - it is unmapped from all mmaps;
* - its content is discarded;
* - its backing memory is released to OS.
*/
void fileh_dirty_discard(BigFileH *fileh);
/* pagefault handler
*
* serves read/write access to protected memory: loads data from file on demand
* and tracks which pages were made dirty.
*
* (clients call this indirectly via triggering SIGSEGV on read/write to memory)
*/
void vma_on_pagefault(VMA *vma, uintptr_t addr, int write);
int pagefault_init(void); /* in pagefault.c */
/* release some non-dirty ram back to OS; protect PROT_NONE related mappings
*
* This should be called when system is low on memory - it will scan through
* RAM pages and release some LRU non-dirty pages ram memory back to OS.
*
* (this is usually done automatically under memory pressure)
*
* @return how many RAM pages were reclaimed
* XXX int -> size_t ?
*/
int ram_reclaim(RAM *ram);
/************ /************
* Internal * * Internal *
************/ ************/
...@@ -69,8 +260,18 @@ void page_incref(Page *page); ...@@ -69,8 +260,18 @@ void page_incref(Page *page);
void page_decref(Page *page); void page_decref(Page *page);
/* lookup VMA by addr */
VMA *virt_lookup_vma(void *addr);
void virt_register_vma(VMA *vma);
void virt_unregister_vma(VMA *vma);
/* allocate virtual memory address space */ /* allocate virtual memory address space */
void *mem_valloc(void *addr, size_t len); void *mem_valloc(void *addr, size_t len);
void *mem_xvalloc(void *addr, size_t len); void *mem_xvalloc(void *addr, size_t len);
// XXX is this needed? think more
/* what happens on out-of-memory */
void OOM(void);
#endif #endif
/* Demo program, that shows 2 memory pages can be combined into 1 bigger
* _contiguous_ memory area via shm / mmap. The idea is that this way we'll
* combine array pages into larger slice on client __getslice__ requests and
* the result would be usual contiguous ndarray while pages of it could live in
* different places in memory.
*
* Unfortunately there is no way to mmap-duplicate pages for MAP_ANONYMOUS, so
* the way it is done is via a file in tmpfs (on /dev/shm/ via posix shm):
*
* https://groups.google.com/forum/#!topic/comp.os.linux.development.system/Prx7ExCzsv4
*/
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#define TRACE(msg, ...) do { \
fprintf(stderr, msg, ##__VA_ARGS__); \
fprintf(stderr, "\n"); \
} while (0)
void die(const char *msg)
{
perror(msg);
exit(1);
}
int main()
{
uint8_t *page1, *page2, *page12, *p;
size_t len;
int f, err;
len = 1*4096; /* XXX = 1 page */
/* TODO - choose name a-la mktemp and loop changing it if EEXIST */
f = shm_open("/array", O_RDWR | O_CREAT | O_EXCL,
S_IRUSR | S_IWUSR);
if (f < 0)
die("shm_open");
/*
* unlink so that the file is removed on only memory mapping(s) are left.
* All mappings will be released upon program exit and so the memory
* resources would release too
*/
err = shm_unlink("/array");
if (err)
perror("shm_unlink");
/* whole memory-segment size */
err = ftruncate(f, len);
if (err < 0)
die("ftruncate");
/* page1 - memory view onto array page[0] */
page1 = mmap(/*addr=*/NULL, len,
PROT_READ | PROT_WRITE,
MAP_SHARED, // | MAP_HUGETLB | MAP_UNINITIALIZED ?
f, 0);
if (page1 == MAP_FAILED)
die("mmap page1");
TRACE("mmap page1 ok");
page1[0] = 1;
TRACE("store page1 ok (%i)", page1[0]);
/* page2 - memory view onto array page[0] (content should be identical to page1) */
page2 = mmap(/*addr=*/NULL, len,
PROT_READ | PROT_WRITE,
MAP_SHARED, // | MAP_HUGETLB | MAP_UNINITIALIZED ?
f, 0);
if (page2 == MAP_FAILED)
die("mmap page2");
TRACE("mmap page2 ok (%i)", page2[0]);
assert(page2[0] == 1);
/* alloc 2*page contiguous VMA */
page12 = mmap(NULL, 2*len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (page12 == MAP_FAILED)
die("mmap page12");
TRACE("stub page12 ok");
/* page12[0] -> array.page[0] */
p = mmap(&page12[0*len], len, PROT_READ, MAP_SHARED | MAP_FIXED, f, 0);
if (p == MAP_FAILED || (p != &page12[0*len]))
die("mmap page12.0");
/* page12[1] -> array.page[0] */
p = mmap(&page12[1*len], len, PROT_READ, MAP_SHARED | MAP_FIXED, f, 0);
if (p == MAP_FAILED || (p != &page12[1*len]))
die("mmap page12.1");
TRACE("page12 ok (%i %i)", page12[0], page12[len]);
assert(page12[0] == 1);
assert(page12[len] == 1);
page1[0] = 33;
TRACE("page12 ok (%i %i)", page12[0], page12[len]);
assert(page12[0] == 33);
assert(page12[len] == 33);
page2[0] = 45;
TRACE("page12 ok (%i %i)", page12[0], page12[len]);
assert(page12[0] == 45);
assert(page12[len] == 45);
/* should segfault - we only requested PROT_READ */
TRACE("will segfault...");
page12[0] = 55;
return 0;
}
#include "t_utils.h"
#include <wendelin/utils.h>
static const struct ram_ops ram_limited_ops;
static const struct ramh_ops ramh_limited_ops;
RAMLimited *ram_limited_new(RAM *backend, size_t alloc_max)
{
RAMLimited *ram;
ram = zalloc(sizeof(*ram));
if (!ram)
return NULL;
ram->backend = backend;
ram->pagesize = backend->pagesize;
/* NOTE allocated pages will be linked here (instead of backend->lru_list)
* automatically, as upper code thinks _we_ allocated the page */
INIT_LIST_HEAD(&ram->lru_list);
ram->alloc_max = alloc_max;
ram->nalloc = 0;
ram->ram_ops = &ram_limited_ops;
return ram;
}
struct RAMHLimited {
RAMH;
RAMH *backend;
};
typedef struct RAMHLimited RAMHLimited;
size_t ram_limited_get_current_maxsize(RAM *ram0)
{
RAMLimited *ram = upcast(RAMLimited *, ram0);
return ram_get_current_maxsize(ram->backend);
}
RAMH *ram_limited_ramh_open(RAM *ram0)
{
RAMLimited *ram = upcast(RAMLimited *, ram0);
RAMHLimited *ramh;
ramh = zalloc(sizeof(*ramh));
if (!ramh)
goto out;
ramh->backend = ramh_open(ram->backend);
if (!ramh->backend)
goto out;
ramh->ram = ram;
ramh->ramh_ops = &ramh_limited_ops;
return ramh;
out:
free(ramh);
return NULL;
}
void ram_limited_close(RAM *ram0)
{
//RAMLimited *ram = upcast(RAMLimited *, ram0);
// XXX close if owning?
// ram_close(ram->backend);
// TODO free(self) ?
}
static const struct ram_ops ram_limited_ops = {
.get_current_maxsize = ram_limited_get_current_maxsize,
.ramh_open = ram_limited_ramh_open,
.close = ram_limited_close,
};
pgoff_t ramh_limited_alloc_page(RAMH *ramh0, pgoff_t pgoffset_hint)
{
RAMHLimited *ramh = upcast(RAMHLimited *, ramh0);
RAMLimited *ram = upcast(RAMLimited *, ramh->ram);
pgoff_t pgoff;
/* deny allocation when max #pages already allocated */
if (ram->nalloc >= ram->alloc_max)
return RAMH_PGOFF_ALLOCFAIL;
pgoff = ramh->backend->ramh_ops->alloc_page(ramh->backend, pgoffset_hint);
if (pgoff != RAMH_PGOFF_ALLOCFAIL)
ram->nalloc++;
return pgoff;
}
void ramh_limited_drop_memory(RAMH *ramh0, pgoff_t ramh_pgoffset)
{
RAMHLimited *ramh = upcast(RAMHLimited *, ramh0);
RAMLimited *ram = upcast(RAMLimited *, ramh->ram);
ramh->backend->ramh_ops->drop_memory(ramh->backend, ramh_pgoffset);
ram->nalloc--;
}
void *ramh_limited_mmap_page(RAMH *ramh0, pgoff_t ramh_pgoffset, void *addr, int prot)
{
RAMHLimited *ramh = upcast(RAMHLimited *, ramh0);
return ramh->backend->ramh_ops->mmap_page(ramh->backend, ramh_pgoffset, addr, prot);
}
void ramh_limited_close(RAMH *ramh0)
{
RAMHLimited *ramh = upcast(RAMHLimited *, ramh0);
ramh->backend->ramh_ops->close(ramh->backend);
// TODO free(self) ?
}
static const struct ramh_ops ramh_limited_ops = {
.alloc_page = ramh_limited_alloc_page,
.drop_memory = ramh_limited_drop_memory,
.mmap_page = ramh_limited_mmap_page,
.close = ramh_limited_close,
};
#ifndef _WENDELIN_TESTING_UTILS_H_
#define _WENDELIN_TESTING_UTILS_H_
/* Wendelin.bigfile | various testing utilities
* Copyright (C) 2014-2015 Nexedi SA and Contributors.
* Kirill Smelkov <kirr@nexedi.com>
*
* This program is free software: you can Use, Study, Modify and Redistribute
* it under the terms of the GNU General Public License version 3, or (at your
* option) any later version, as published by the Free Software Foundation.
*
* You can also Link and Combine this program with other software covered by
* the terms of any of the Open Source Initiative approved licenses and Convey
* the resulting work. Corresponding source of such a combination shall include
* the source code for all other software used.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See COPYING file for full licensing terms.
*/
#include <wendelin/bigfile/ram.h>
/* access to vma memory as byte[] and blk_t[] */
#define b(vma, idx) ( ((volatile uint8_t *)vma->addr_start) [ idx ] )
#define B(vma, idx) ( ((volatile blk_t *)vma->addr_start) [ idx ] )
/* RAM with limit on #allocated pages
*
* NOTE allocated pages are linked to ->lru_list and backend->lru_list will be empty.
*/
struct RAMLimited {
RAM;
RAM *backend;
size_t alloc_max;
size_t nalloc;
};
typedef struct RAMLimited RAMLimited;
RAMLimited *ram_limited_new(RAM *backend, size_t alloc_max);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment