Blame view

bigfile/ram_shmfs.c 7.04 KB
Kirill Smelkov committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/* Wendelin.bigfile | shmfs (aka tmpfs) ram backend
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
 * the terms of any of the Open Source Initiative approved licenses and Convey
 * the resulting work. Corresponding source of such a combination shall include
 * the source code for all other software used.
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
 *
 * ~~~~~~~~
 *
 * TODO description
 */

#include <wendelin/bigfile/ram.h>
#include <wendelin/utils.h>
#include <wendelin/bug.h>

#include <fcntl.h>
Kirill Smelkov committed
29 30 31 32 33 34 35 36 37 38 39
/* FIXME glibc in Debian before Jessie does not define FALLOC_FL_KEEP_SIZE and
 * FALLOC_FL_PUNCH_HOLE, even when kernel supports it
 * http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/falloc.h
 */
#ifndef FALLOC_FL_KEEP_SIZE
# define FALLOC_FL_KEEP_SIZE    0x01
#endif
#ifndef FALLOC_FL_PUNCH_HOLE
# define FALLOC_FL_PUNCH_HOLE   0x02
#endif

Kirill Smelkov committed
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
#include <unistd.h>
#include <sys/vfs.h>
#include <sys/mman.h>
#include <sys/types.h>


/* we'll manage RAM in "pages" of 2M
 *
 * Compared to std 4K pages, this will reduce per-page overhead and also
 * coincides with huge page size on x86/x86_64).
 *
 * Hardware pages will still be of usual 4K size - we'll just manage them in
 * 512-pages groups.
 */
#define SHMFS_PAGE_SIZE     (2*1024*1024ULL)


/* default prefix & ramh files template */
static const char shmfs_ram_prefix_default[] = "/dev/shm";
static const char shmfs_ramh_template[]      = "ramh.XXXXXX";


/* RAM on shmfs */
struct SHMFS_RAM {
    RAM;

    const char *prefix; /* prefix where to create ramh files */
};
typedef struct SHMFS_RAM SHMFS_RAM;


/* RAM Handle on shmfs */
struct SHMFS_RAMH {
    RAMH;

    int    ramh_fd;
    size_t ramh_fpgsize;  /* current file size in pagesize units */
};
typedef struct SHMFS_RAMH SHMFS_RAMH;



static void *shmfs_mmap_page(RAMH *ramh0, pgoff_t ramh_pgoffset, void *addr, int prot)
{
    SHMFS_RAMH *ramh = upcast(SHMFS_RAMH *, ramh0);
    size_t pagesize = ramh->ram->pagesize;

    // XXX MAP_POPULATE so that we can access mmaped memory without additional pagefault?
    //     tried -> this mmap becomes slow, and overall the whole run is slower. XXX why?
    addr = mmap(addr, pagesize,
                prot,
                MAP_SHARED
                | (addr ? MAP_FIXED : 0),
                ramh->ramh_fd,
                ramh_pgoffset * pagesize);
    if (addr == MAP_FAILED)
        addr = NULL;

    return addr;
}


pgoff_t shmfs_alloc_page(RAMH *ramh0, pgoff_t pgoffset_hint)
{
    // FIXME double calls with same pgoffset_hint ? (or move ->pagemap to ramh ?)
    SHMFS_RAMH *ramh = upcast(SHMFS_RAMH *, ramh0);
    pgoff_t ramh_pgoffset = pgoffset_hint;
    size_t pagesize = ramh->ram->pagesize;
    int err;

    /*
     * - allocate space for page at ramh_pgoffset,
     * - hole-grow file to size covering that page, if file was smaller,
     *
     * all in one go.
     *
     * We allocate filesystem space so that we know we really allocated that
     * memory now and that client code will not get SIGBUS on memory read/write
     * or EFAULT on syscalls read/write, when accessing it later.
     *
     * It is easier to handle ENOMEM synchronously.
     */
    err = fallocate(ramh->ramh_fd, 0 /* without KEEP_SIZE */,
            ramh_pgoffset * pagesize, pagesize);

    if (err)
        return RAMH_PGOFF_ALLOCFAIL;

    if (ramh_pgoffset >= ramh->ramh_fpgsize)
        ramh->ramh_fpgsize = ramh_pgoffset+1;

    return ramh_pgoffset;
}


static void shmfs_drop_memory(RAMH *ramh0, pgoff_t ramh_pgoffset)
{
    SHMFS_RAMH *ramh = upcast(SHMFS_RAMH *, ramh0);
    size_t pagesize = ramh->ram->pagesize;

    BUG_ON(ramh_pgoffset >= ramh->ramh_fpgsize);

    // XXX state -> empty ?

    /* punch hole and this way release memory to OS.
     * this should not fail - if it is, something is wrong */
    xfallocate(ramh->ramh_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
            ramh_pgoffset * pagesize, pagesize);
}


static void shmfs_close(RAMH *ramh0)
{
    SHMFS_RAMH *ramh = upcast(SHMFS_RAMH *, ramh0);

    // XXX verify no mapping left?

    /* drop all memory & close */
    xftruncate(ramh->ramh_fd, 0);
    xclose(ramh->ramh_fd);
    ramh->ramh_fd = -1;
    ramh->ramh_fpgsize = 0;

    // TODO free(self) ?
}


static const struct ramh_ops shmfs_ramh_ops = {
    .alloc_page     = shmfs_alloc_page,
    .mmap_page      = shmfs_mmap_page,
    .drop_memory    = shmfs_drop_memory,
    .close          = shmfs_close,
};


static size_t shmfs_get_current_maxsize(RAM *ram0)
{
    SHMFS_RAM *ram = upcast(SHMFS_RAM *, ram0);
    struct statfs sf;
    int err;

    // XXX races with fs remount/change under prefix
    err = statfs(ram->prefix, &sf);
    if (err)
        BUGe();

    return sf.f_blocks * sf.f_bsize / ram->pagesize;
}


static RAMH *shmfs_ramh_open(RAM *ram0)
{
    SHMFS_RAM *ram = upcast(SHMFS_RAM *, ram0);
    SHMFS_RAMH *ramh;
    char *s,   *ramh_filename = NULL;
    int err;

    ramh = zalloc(sizeof(*ramh));
    if (!ramh)
        goto out;

    ramh->ramh_ops = &shmfs_ramh_ops;
    ramh->ram      = ram;


    ramh_filename = malloc(strlen(ram->prefix) + 1/*"/"*/ +
                           strlen(shmfs_ramh_template) + 1/*NUL*/);
    if (!ramh_filename)
        goto out;

    s = ramh_filename;
    s = stpcpy(s, ram->prefix);
    s = stpcpy(s, "/");
    s = stpcpy(s, shmfs_ramh_template);

    ramh->ramh_fd = mkstemp(ramh_filename);
    if (ramh->ramh_fd == -1)
        goto out;

    // XXX maybe by default show and unlink atexit / on close
    /* unlink ramh file, if not asked to leave it show for debugging */
    s = getenv("WENDELIN_RAMH_HIDE");
    if (!s || (s && s[0] == 'y')) {
        err = unlink(ramh_filename);
        if (err)
            BUGe();
    }

    free(ramh_filename);

    ramh->ramh_fpgsize = 0;
    return ramh;

out:
    free(ramh);
    free(ramh_filename);
    return NULL;
}


static const struct ram_ops shmfs_ram_ops = {
    .get_current_maxsize    = shmfs_get_current_maxsize,
    .ramh_open              = shmfs_ramh_open,
    //.close    = shmfs_ram_dtor
};


/* shmfs ram type */
static RAM *shmfs_ram_new(const char *arg)
{
    SHMFS_RAM *ram = xzalloc(sizeof(*ram));

    ram->ram_ops   = &shmfs_ram_ops;
    ram->pagesize  = SHMFS_PAGE_SIZE;
    INIT_LIST_HEAD(&ram->lru_list);

    // TODO ensure prefix points to somewhere on shmfs
    ram->prefix    = xstrdup(arg ?: shmfs_ram_prefix_default);

    return ram;
};


// TODO shmfs_ram_dtor


static const struct ram_type shmfs_ram_type = {
    .name     = "shmfs",
    .ram_new  = shmfs_ram_new,
};



__attribute__((constructor))
static void  shmfs_init(void)
{
    ram_register_type(&shmfs_ram_type);
}