Blame view

include/wendelin/bigfile/virtmem.h 10.1 KB
Kirill Smelkov committed
1 2 3 4 5 6 7 8 9 10 11 12
#ifndef _WENDELIN_BIGFILE_VIRTMEM_H_
#define _WENDELIN_BIGFILE_VIRTMEM_H_

/* Wendelin.bigfile | Virtual memory
 * Copyright (C) 2014-2015  Nexedi SA and Contributors.
 *                          Kirill Smelkov <kirr@nexedi.com>
 *
 * This program is free software: you can Use, Study, Modify and Redistribute
 * it under the terms of the GNU General Public License version 3, or (at your
 * option) any later version, as published by the Free Software Foundation.
 *
 * You can also Link and Combine this program with other software covered by
Kirill Smelkov committed
13 14 15 16
 * the terms of any of the Free Software licenses or any of the Open Source
 * Initiative approved licenses and Convey the resulting work. Corresponding
 * source of such a combination shall include the source code for all other
 * software used.
Kirill Smelkov committed
17 18 19 20 21
 *
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See COPYING file for full licensing terms.
Kirill Smelkov committed
22
 * See https://www.nexedi.com/licensing for rationale and options.
Kirill Smelkov committed
23 24 25 26 27 28 29 30 31
 *
 * ~~~~~~~~
 *
 * Virtual memory connects BigFile content and RAM pages into file memory
 * mappings.
 *
 * Read access to mapped pages cause their on-demand loading, and write access
 * marks modified pages as dirty. Dirty pages then can be on request either
 * written out back to file or discarded.
Kirill Smelkov committed
32 33 34 35
 */

#include <stdint.h>
#include <wendelin/list.h>
Kirill Smelkov committed
36 37 38
#include <wendelin/bigfile/types.h>
#include <wendelin/bigfile/pagemap.h>
#include <ccan/bitmap/bitmap.h> // XXX can't forward-decl for bitmap
Kirill Smelkov committed
39

Kirill Smelkov committed
40
typedef struct RAM RAM;
Kirill Smelkov committed
41
typedef struct RAMH RAMH;
Kirill Smelkov committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
typedef struct Page Page;
typedef struct BigFile BigFile;


/* BigFile Handle
 *
 * BigFile handle is a representation of file snapshot that could be locally
 * modified in-memory. The changes could be later either discarded or stored
 * back to file. One file can have many opened handles each with its own
 * modifications and optionally ram.
 */
struct BigFileH {
    BigFile *file;

    /* ram handle, backing this fileh mappings */
    RAMH    *ramh;

    /* fileh mappings (list of VMA)
     * NOTE current design assumes there will be not many mappings
     *      so instead of backpointers from pages to vma mapping entries, we'll
     *      scan all page->fileh->mmaps to overlap with page.
     */
    struct list_head mmaps; /* _ -> vma->same_fileh */
Kirill Smelkov committed
65

Kirill Smelkov committed
66 67
    /* {} f_pgoffset -> page */
    PageMap     pagemap;
Kirill Smelkov committed
68

Kirill Smelkov committed
69

Kirill Smelkov committed
70 71
    /* fileh dirty pages */
    struct list_head dirty_pages;   /* _ -> page->in_dirty */
Kirill Smelkov committed
72 73 74

    /* whether writeout is currently in progress */
    int writeout_inprogress;
Kirill Smelkov committed
75 76 77 78 79
};
typedef struct BigFileH BigFileH;


/* Page - describes fixed-size item of physical RAM associated with content from fileh */
Kirill Smelkov committed
80 81
enum PageState {
    PAGE_EMPTY      = 0, /* file content has not been loaded yet */
Kirill Smelkov committed
82 83 84 85 86 87
    PAGE_LOADING    = 1, /* file content              loading is  in progress */
    PAGE_LOADING_INVALIDATED
                    = 2, /* file content              loading was in progress
                            while request to invalidate the page came in */
    PAGE_LOADED     = 3, /* file content has     been loaded and was not modified */
    PAGE_DIRTY      = 4, /* file content has     been loaded and was     modified */
Kirill Smelkov committed
88 89 90 91 92 93
};
typedef enum PageState PageState;

struct Page {
    PageState   state;

Kirill Smelkov committed
94 95 96 97
    /* wrt fileh - associated with */
    BigFileH    *fileh;
    pgoff_t     f_pgoffset;

Kirill Smelkov committed
98 99 100 101 102 103 104
    /* wrt ram - associated with */
    RAMH*       ramh;
    pgoff_t     ramh_pgoffset;

    /* in recently-used pages for ramh->ram (ram->lru_list -> _) */
    struct list_head lru;

Kirill Smelkov committed
105 106 107
    /* in dirty pages for fileh (fileh->dirty_pages -> _) */
    struct list_head in_dirty;

Kirill Smelkov committed
108 109 110 111 112
    int     refcnt; /* each mapping in a vma counts here */
};
typedef struct Page Page;


Kirill Smelkov committed
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

/* VMA - virtual memory area representing one fileh mapping
 *
 * NOTE areas may not overlap in virtual address space
 *      (in file space they can overlap).
 */
typedef struct VMA VMA;
struct VMA {
    uintptr_t   addr_start, addr_stop;    /* [addr_start, addr_stop) */

    BigFileH    *fileh;         /* for which fileh */
    pgoff_t     f_pgoffset;     /* where starts, in pages */

    /* FIXME For approximation 0, VMA(s) are kept in sorted doubly-linked
     * list, which is not good for lookup/add/remove performance O(n), but easy to
     * program. This should be ok for first draft, as there are not many fileh
     * views taken simultaneously.
     *
     * TODO for better performance, some binary-search-tree should be used.
     */
    struct list_head virt_list; /* (virtmem.c::vma_list -> _) */

    /* VMA's for the same fileh (fileh->mmaps -> _) */
    struct list_head same_fileh;

    /* whether corresponding to pgoffset-f_offset page is mapped in this VMA */
    bitmap      *page_ismappedv;    /* len ~ Δaddr / pagesize */
};


/*****************************
 *      API for clients      *
 *****************************/

/* open handle for a BigFile
 *
 * @fileh[out]  BigFileH handle to initialize for this open
 * @file
 * @ram         RAM that will back created fileh mappings
 *
 * @return  0 - ok, !0 - fail
 */
int fileh_open(BigFileH *fileh, BigFile *file, RAM *ram);


/* close fileh
 *
 * it's an error to call fileh_close with existing mappings
Kirill Smelkov committed
161
 * it's an error to call fileh_close while writeout for fileh is in progress
Kirill Smelkov committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
 */
void fileh_close(BigFileH *fileh);


/* map fileh part into memory
 *
 * This "maps" fileh part [pgoffset, pglen) in pages into process address space.
 *
 * @vma[out]    vma to initialize for this mmap
 * @return      0 - ok, !0 - fail
 */
int fileh_mmap(VMA *vma, BigFileH *fileh, pgoff_t pgoffset, pgoff_t pglen);


/* unmap mapping created by fileh_mmap()
 *
 * This removes mapping created by fileh_mmap() from process address space.
 * Changes made to fileh pages are preserved (to e.g. either other mappings and
 * later commit/discard).
 */
void vma_unmap(VMA *vma);


/* what to do at writeout */
enum WriteoutFlags {
    /* store dirty pages back to file
     *
     * - call file.storeblk() for all dirty pages;
     * - pages state remains PAGE_DIRTY.
     *
     * to "finish" the storage use WRITEOUT_MARKSTORED in the same or separate
     * call.
     */
    WRITEOUT_STORE          = 1 << 0,

    /* mark dirty pages as stored to file ok
     *
     * pages state becomes PAGE_LOADED and all mmaps are updated to map pages as
     * R/O to track further writes.
     */
    WRITEOUT_MARKSTORED     = 1 << 1,
};

/* write changes made to fileh memory back to file
 *
 * Perform write-related actions according to flags (see WriteoutFlags).
 *
 * @return  0 - ok      !0 - fail
 *          NOTE single WRITEOUT_MARKSTORED can not fail.
 *
 * No guarantee is made about atomicity - e.g. if this call fails, some
 * pages could be written and some left in memory in dirty state.
Kirill Smelkov committed
214 215 216 217 218 219
 *
 * it's an error for a given fileh to call several fileh_dirty_writeout() in
 * parallel.
 *
 * it's an error for a given fileh to modify its pages while writeout is in
 * progress: until fileh_dirty_writeout(... | WRITEOUT_STORE) has finished.
Kirill Smelkov committed
220 221 222 223 224 225 226 227 228 229 230
 */
int fileh_dirty_writeout(BigFileH *fileh, enum WriteoutFlags flags);


/* discard changes made to fileh memory
 *
 * For each fileh dirty page:
 *
 *   - it is unmapped from all mmaps;
 *   - its content is discarded;
 *   - its backing memory is released to OS.
Kirill Smelkov committed
231 232 233
 *
 * it's an error for a given fileh to call fileh_dirty_discard() while writeout
 * is in progress.
Kirill Smelkov committed
234 235 236 237
 */
void fileh_dirty_discard(BigFileH *fileh);


Kirill Smelkov committed
238 239 240 241 242

/* invalidate fileh page
 *
 * Make sure that page corresponding to pgoffset is not present in fileh memory.
 *
Kirill Smelkov committed
243
 * The page could be in either dirty or loaded/loading or empty state. In all
Kirill Smelkov committed
244 245 246 247
 * cases page transitions to empty state and its memory is forgotten.
 *
 * ( Such invalidation is needed to synchronize fileh memory, when we know a
 *   file was changed externally )
Kirill Smelkov committed
248 249 250
 *
 * it's an error to call fileh_invalidate_page() while writeout for fileh is in
 * progress.
Kirill Smelkov committed
251 252 253 254
 */
void fileh_invalidate_page(BigFileH *fileh, pgoff_t pgoffset);


Kirill Smelkov committed
255 256 257 258 259 260 261
/* pagefault handler
 *
 * serves read/write access to protected memory: loads data from file on demand
 * and tracks which pages were made dirty.
 *
 * (clients call this indirectly via triggering SIGSEGV on read/write to memory)
 */
Kirill Smelkov committed
262 263 264 265 266 267
enum VMFaultResult {
    VM_HANDLED  = 0, /* pagefault handled */
    VM_RETRY    = 1, /* pagefault handled partly - handling have to be retried */
};
typedef enum VMFaultResult VMFaultResult;
VMFaultResult vma_on_pagefault(VMA *vma, uintptr_t addr, int write);
Kirill Smelkov committed
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
int pagefault_init(void);   /* in pagefault.c */


/* release some non-dirty ram back to OS; protect PROT_NONE related mappings
 *
 * This should be called when system is low on memory - it will scan through
 * RAM pages and release some LRU non-dirty pages ram memory back to OS.
 *
 * (this is usually done automatically under memory pressure)
 *
 * @return  how many RAM pages were reclaimed
 * XXX int -> size_t ?
 */
int ram_reclaim(RAM *ram);



Kirill Smelkov committed
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
/************
 * Internal *
 ************/

/* mmap page memory into address space
 *
 * @addr     NULL - mmap somewhere,    !NULL - mmap exactly there (MAP_FIXED)
 * @return  !NULL - mmapped ok there,   NULL - error
 *
 * NOTE to unmap memory either
 *
 *      - use usual munmap(2), or
 *      - mmap(2) something else in place of mmaped page memory.
 */
void *page_mmap(Page *page, void *addr, int prot);

void page_incref(Page *page);
void page_decref(Page *page);


Kirill Smelkov committed
305 306 307 308 309
/* lookup VMA by addr */
VMA *virt_lookup_vma(void *addr);
void virt_register_vma(VMA *vma);
void virt_unregister_vma(VMA *vma);

Kirill Smelkov committed
310 311 312 313
/* allocate virtual memory address space */
void *mem_valloc(void *addr, size_t len);
void *mem_xvalloc(void *addr, size_t len);

Kirill Smelkov committed
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
/* big virtmem lock */
void virt_lock(void);
void virt_unlock(void);

/* for thirdparty to hook into locking big virtmem lock process
 * (e.g. for python to hook in its GIL release/reacquire)  */
struct VirtGilHooks {
    /* drop gil, if current thread hold it */
    void *  (*gil_ensure_unlocked)      (void);
    /* retake gil, if we were holding it at ->ensure_unlocked() stage */
    void    (*gil_retake_if_waslocked)  (void *);
};
typedef struct VirtGilHooks VirtGilHooks;

void virt_lock_hookgil(const VirtGilHooks *gilhooks);


Kirill Smelkov committed
331 332 333 334 335
// XXX is this needed? think more
/* what happens on out-of-memory */
void OOM(void);


Kirill Smelkov committed
336
#endif