1 /** 2 A small malloc implementation for use in WebAssembly targets 3 4 Copyright (c) 2023-2025, Kitsunebi Games 5 Copyright (c) 2023-2025, Inochi2D Project 6 Copyright (c) 2020, Igalia, S.L. 7 8 Distributed under an MIT-style License. 9 (See accompanying LICENSE file or copy at 10 https://github.com/wingo/walloc/blob/master/LICENSE.md) 11 */ 12 module walloc; 13 import ldc.intrinsics : 14 llvm_wasm_memory_grow, 15 llvm_wasm_memory_size, 16 llvm_memmove; 17 18 extern(C) @nogc nothrow: 19 20 void* malloc(size_t size) @nogc nothrow @system { 21 if (size == 0) 22 return null; 23 24 size_t granules = size_to_granules(size); 25 chunk_kind kind = granules_to_chunk_kind(granules); 26 return (kind == chunk_kind.LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 27 } 28 29 export 30 void free(void *ptr) @nogc nothrow @system { 31 if (!ptr) return; 32 33 _page_t* page = get_page(ptr); 34 size_t chunk = get_chunk_index(ptr); 35 ubyte kind = page.header.chunk_kinds[chunk]; 36 if (kind == chunk_kind.LARGE_OBJECT) { 37 _large_object_t* obj = get_large_object(ptr); 38 obj.next = large_objects; 39 large_objects = obj; 40 allocate_chunk(page, chunk, chunk_kind.FREE_LARGE_OBJECT); 41 pending_large_object_compact = 1; 42 } else { 43 size_t granules = kind; 44 _freelist_t** loc = get_small_object_freelist(cast(chunk_kind)granules); 45 _freelist_t* obj = cast(_freelist_t*)ptr; 46 obj.next = *loc; 47 *loc = obj; 48 } 49 } 50 51 export 52 void* realloc(void* ptr, size_t newSize) @nogc nothrow @system { 53 if (!ptr) 54 return malloc(newSize); 55 56 size_t oldSize = get_alloc_size(ptr); 57 if (oldSize <= newSize) 58 return ptr; 59 60 // Size is bigger, realloc just to be sure. 61 void* n_mem = malloc(newSize); 62 llvm_memmove(n_mem, ptr, oldSize, true); 63 free(ptr); 64 return n_mem; 65 } 66 67 private: 68 69 size_t get_alloc_size(void* ptr) { 70 _page_t* page = get_page(ptr); 71 size_t chunk = get_chunk_index(ptr); 72 chunk_kind kind = cast(chunk_kind)page.header.chunk_kinds[chunk]; 73 74 if (kind == chunk_kind.LARGE_OBJECT) { 75 _large_object_t* obj = get_large_object(ptr); 76 return obj.size; 77 } 78 79 ptrdiff_t granules = chunk_kind_to_granules(kind); 80 if (granules <= chunk_kind.SMALL_OBJECT_CHUNK_KINDS) 81 return granules * GRANULE_SIZE; 82 83 return 0; 84 } 85 86 extern __gshared void* __heap_base; 87 __gshared size_t walloc_heap_size; 88 __gshared _freelist_t*[chunk_kind.SMALL_OBJECT_CHUNK_KINDS] small_object_freelists; 89 __gshared _large_object_t* large_objects; 90 91 92 pragma(inline, true) 93 size_t _max(size_t a, size_t b) { return a < b ? b : a; } 94 95 pragma(inline, true) 96 size_t _alignv(size_t val, size_t alignment) { return (val + alignment - 1) & ~(alignment - 1); } 97 98 pragma(inline, true) 99 extern(D) 100 void __assert_aligned(T, Y)(T x, Y y) { 101 assert(cast(size_t)x == _alignv(cast(size_t)x, cast(size_t)y)); 102 } 103 104 enum size_t CHUNK_SIZE = 256; 105 enum size_t CHUNK_SIZE_LOG_2 = 8; 106 enum size_t CHUNK_MASK = (CHUNK_SIZE - 1); 107 enum size_t PAGE_SIZE = 65536; 108 enum size_t PAGE_SIZE_LOG_2 = 16; 109 enum size_t PAGE_MASK = (PAGE_SIZE - 1); 110 enum size_t CHUNKS_PER_PAGE = 256; 111 enum size_t GRANULE_SIZE = 8; 112 enum size_t GRANULE_SIZE_LOG_2 = 3; 113 enum size_t LARGE_OBJECT_THRESHOLD = 256; 114 enum size_t LARGE_OBJECT_GRANULE_THRESHOLD = 32; 115 enum size_t FIRST_ALLOCATABLE_CHUNK = 1; 116 enum size_t PAGE_HEADER_SIZE = _page_header_t.sizeof; 117 enum size_t LARGE_OBJECT_HEADER_SIZE = _large_object_t.sizeof; 118 119 static assert(PAGE_SIZE == CHUNK_SIZE * CHUNKS_PER_PAGE); 120 static assert(CHUNK_SIZE == 1 << CHUNK_SIZE_LOG_2); 121 static assert(PAGE_SIZE == 1 << PAGE_SIZE_LOG_2); 122 static assert(GRANULE_SIZE == 1 << GRANULE_SIZE_LOG_2); 123 static assert(LARGE_OBJECT_THRESHOLD == 124 LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 125 126 struct _chunk_t { 127 void[CHUNK_SIZE] data; 128 } 129 130 enum chunk_kind : ubyte { 131 GRANULES_1, 132 GRANULES_2, 133 GRANULES_3, 134 GRANULES_4, 135 GRANULES_5, 136 GRANULES_6, 137 GRANULES_8, 138 GRANULES_10, 139 GRANULES_16, 140 GRANULES_32, 141 142 SMALL_OBJECT_CHUNK_KINDS, 143 FREE_LARGE_OBJECT = 254, 144 LARGE_OBJECT = 255 145 } 146 147 __gshared const ubyte[] small_object_granule_sizes = [ 148 1, 2, 3, 4, 5, 6, 8, 10, 16, 32 149 ]; 150 151 pragma(inline, true) 152 chunk_kind granules_to_chunk_kind(size_t granules) { 153 static foreach(gsize; small_object_granule_sizes) { 154 if (granules <= gsize) 155 return mixin(q{chunk_kind.GRANULES_}, cast(int)gsize); 156 } 157 return chunk_kind.LARGE_OBJECT; 158 } 159 160 pragma(inline, true) 161 ubyte chunk_kind_to_granules(chunk_kind kind) { 162 static foreach(gsize; small_object_granule_sizes) { 163 if (kind == mixin(q{chunk_kind.GRANULES_}, cast(int)gsize)) 164 return gsize; 165 } 166 return cast(ubyte)-1; 167 } 168 169 struct _page_header_t { 170 ubyte[CHUNKS_PER_PAGE] chunk_kinds; 171 } 172 173 struct _page_t { 174 union { 175 _page_header_t header; 176 _chunk_t[CHUNKS_PER_PAGE] chunks; 177 } 178 } 179 180 pragma(inline, true) 181 _page_t* get_page(void *ptr) { 182 return cast(_page_t*)cast(void*)((cast(size_t) ptr) & ~PAGE_MASK); 183 } 184 185 pragma(inline, true) 186 static size_t get_chunk_index(void *ptr) { 187 return ((cast(size_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 188 } 189 190 struct _freelist_t { 191 _freelist_t *next; 192 } 193 194 struct _large_object_t { 195 _large_object_t* next; 196 size_t size; 197 } 198 199 pragma(inline, true) 200 void* get_large_object_payload(_large_object_t *obj) { 201 return (cast(void*)obj) + LARGE_OBJECT_HEADER_SIZE; 202 } 203 204 pragma(inline, true) 205 _large_object_t* get_large_object(void *ptr) { 206 return cast(_large_object_t*)ptr - LARGE_OBJECT_HEADER_SIZE; 207 } 208 209 _page_t* allocate_pages(size_t payloadSize, size_t* allocated) { 210 size_t needed = payloadSize + PAGE_HEADER_SIZE; 211 size_t heap_size = llvm_wasm_memory_size(0) * PAGE_SIZE; 212 size_t base = heap_size; 213 size_t preallocated = 0, grow = 0; 214 215 if (!walloc_heap_size) { 216 // We are allocating the initial pages, if any. We skip the first 64 kB, 217 // then take any additional space up to the memory size. 218 size_t heap_base = _alignv(cast(size_t)&__heap_base, PAGE_SIZE); 219 preallocated = heap_size - heap_base; // Preallocated pages. 220 walloc_heap_size = preallocated; 221 base -= preallocated; 222 } 223 224 if (preallocated < needed) { 225 // Always grow the walloc heap at least by 50%. 226 grow = _alignv(_max(walloc_heap_size / 2, needed - preallocated), 227 PAGE_SIZE); 228 229 assert(grow); 230 if (llvm_wasm_memory_grow(0, cast(int)(grow >> PAGE_SIZE_LOG_2)) == -1) { 231 return null; 232 } 233 234 walloc_heap_size += grow; 235 } 236 237 _page_t* ret = cast(_page_t*)base; 238 size_t size = grow + preallocated; 239 240 assert(size); 241 assert(size == _alignv(size, PAGE_SIZE)); 242 *allocated = size / PAGE_SIZE; 243 return ret; 244 } 245 246 void* allocate_chunk(_page_t* page, size_t idx, chunk_kind kind) { 247 page.header.chunk_kinds[idx] = kind; 248 return page.chunks[idx].data.ptr; 249 } 250 251 // It's possible for splitting to produce a large object of size 248 (256 minus 252 // the header size) -- i.e. spanning a single chunk. In that case, push the 253 // chunk back on the GRANULES_32 small object freelist. 254 void maybe_repurpose_single_chunk_large_objects_head() { 255 if (large_objects.size < CHUNK_SIZE) { 256 size_t idx = get_chunk_index(large_objects); 257 void* ptr = allocate_chunk(get_page(large_objects), idx, chunk_kind.GRANULES_32); 258 large_objects = large_objects.next; 259 _freelist_t* head = cast(_freelist_t*)ptr; 260 head.next = small_object_freelists[chunk_kind.GRANULES_32]; 261 small_object_freelists[chunk_kind.GRANULES_32] = head; 262 } 263 } 264 265 // If there have been any large-object frees since the last large object 266 // allocation, go through the freelist and merge any adjacent objects. 267 __gshared int pending_large_object_compact = 0; 268 _large_object_t** maybe_merge_free_large_object(_large_object_t** prev) { 269 _large_object_t* obj = *prev; 270 271 while(true) { 272 void* end = get_large_object_payload(obj) + obj.size; 273 __assert_aligned(end, CHUNK_SIZE); 274 275 size_t chunk = get_chunk_index(end); 276 if (chunk < FIRST_ALLOCATABLE_CHUNK) { 277 // Merging can't create a large object that newly spans the header chunk. 278 // This check also catches the end-of-heap case. 279 return prev; 280 } 281 _page_t* page = get_page(end); 282 if (page.header.chunk_kinds[chunk] != chunk_kind.FREE_LARGE_OBJECT) { 283 return prev; 284 } 285 _large_object_t* next = cast(_large_object_t*)end; 286 287 _large_object_t** prev_prev = &large_objects; 288 _large_object_t* walk = large_objects; 289 while(true) { 290 assert(walk); 291 if (walk == next) { 292 obj.size += LARGE_OBJECT_HEADER_SIZE + walk.size; 293 *prev_prev = walk.next; 294 if (prev == &walk.next) { 295 prev = prev_prev; 296 } 297 break; 298 } 299 prev_prev = &walk.next; 300 walk = walk.next; 301 } 302 } 303 } 304 305 void maybe_compact_free_large_objects() { 306 if (pending_large_object_compact) { 307 pending_large_object_compact = 0; 308 _large_object_t** prev = &large_objects; 309 while (*prev) { 310 prev = &(*maybe_merge_free_large_object(prev)).next; 311 } 312 } 313 } 314 315 // Allocate a large object with enough space for SIZE payload bytes. Returns a 316 // large object with a header, aligned on a chunk boundary, whose payload size 317 // may be larger than SIZE, and whose total size (header included) is 318 // chunk-aligned. Either a suitable allocation is found in the large object 319 // freelist, or we ask the OS for some more pages and treat those pages as a 320 // large object. If the allocation fits in that large object and there's more 321 // than an aligned chunk's worth of data free at the end, the large object is 322 // split. 323 // 324 // The return value's corresponding chunk in the page as starting a large 325 // object. 326 _large_object_t* allocate_large_object(size_t size) { 327 maybe_compact_free_large_objects(); 328 329 _large_object_t* best = null; 330 _large_object_t** best_prev = &large_objects; 331 size_t best_size = -1; 332 333 _large_object_t** prev = &large_objects; 334 _large_object_t* walk = large_objects; 335 while (walk) { 336 if (walk.size >= size && walk.size < best_size) { 337 best_size = walk.size; 338 best = walk; 339 best_prev = prev; 340 341 // Not going to do any better than this; just return it. 342 if (best_size + LARGE_OBJECT_HEADER_SIZE == _alignv(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 343 break; 344 } 345 346 prev = &walk.next; 347 walk = walk.next; 348 } 349 350 if (!best) { 351 // The large object freelist doesn't have an object big enough for this 352 // allocation. Allocate one or more pages from the OS, and treat that new 353 // sequence of pages as a fresh large object. It will be split if 354 // necessary. 355 size_t size_with_header = size + _large_object_t.sizeof; 356 size_t n_allocated = 0; 357 _page_t* page = allocate_pages(size_with_header, &n_allocated); 358 if (!page) { 359 return null; 360 } 361 362 void* ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, chunk_kind.LARGE_OBJECT); 363 best = cast(_large_object_t*)ptr; 364 size_t page_header = ptr - cast(void*)page; 365 366 best.next = large_objects; 367 best.size = best_size = n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 368 assert(best_size >= size_with_header); 369 } 370 371 allocate_chunk(get_page(best), get_chunk_index(best), chunk_kind.LARGE_OBJECT); 372 373 _large_object_t* next = best.next; 374 *best_prev = next; 375 376 size_t tail_size = (best_size - size) & ~CHUNK_MASK; 377 if (tail_size) { 378 // The best-fitting object has 1 or more aligned chunks free after the 379 // requested allocation; split the tail off into a fresh aligned object. 380 _page_t* start_page = get_page(best); 381 void* start = get_large_object_payload(best); 382 void* end = start + best_size; 383 384 if (start_page == get_page(end - tail_size - 1)) { 385 386 // The allocation does not span a page boundary; yay. 387 __assert_aligned(end, CHUNK_SIZE); 388 } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 389 390 // If the allocation itself smaller than a page, split off the head, then 391 // fall through to maybe split the tail. 392 assert(cast(size_t)end == _alignv(cast(size_t)end, PAGE_SIZE)); 393 394 size_t first_page_size = PAGE_SIZE - (cast(size_t)start & PAGE_MASK); 395 _large_object_t* head = best; 396 allocate_chunk(start_page, get_chunk_index(start), chunk_kind.FREE_LARGE_OBJECT); 397 head.size = first_page_size; 398 head.next = large_objects; 399 large_objects = head; 400 401 maybe_repurpose_single_chunk_large_objects_head(); 402 403 _page_t* next_page = start_page + 1; 404 void* ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, chunk_kind.LARGE_OBJECT); 405 best = cast(_large_object_t*)ptr; 406 best.size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 407 assert(best_size >= size); 408 409 start = get_large_object_payload(best); 410 tail_size = (best_size - size) & ~CHUNK_MASK; 411 } else { 412 413 // A large object that spans more than one page will consume all of its 414 // tail pages. Therefore if the split traverses a page boundary, round up 415 // to page size. 416 __assert_aligned(end, PAGE_SIZE); 417 size_t first_page_size = PAGE_SIZE - (cast(size_t)start & PAGE_MASK); 418 size_t tail_pages_size = _alignv(size - first_page_size, PAGE_SIZE); 419 size = first_page_size + tail_pages_size; 420 tail_size = best_size - size; 421 } 422 best.size -= tail_size; 423 424 size_t tail_idx = get_chunk_index(end - tail_size); 425 while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 426 427 // We would be splitting in a page header; don't do that. 428 tail_size -= CHUNK_SIZE; 429 tail_idx++; 430 } 431 432 if (tail_size) { 433 _page_t *page = get_page(end - tail_size); 434 void* tail_ptr = allocate_chunk(page, tail_idx, chunk_kind.FREE_LARGE_OBJECT); 435 _large_object_t* tail = cast(_large_object_t*) tail_ptr; 436 tail.next = large_objects; 437 tail.size = tail_size - LARGE_OBJECT_HEADER_SIZE; 438 439 debug { 440 size_t payloadsz = cast(size_t)get_large_object_payload(tail) + tail.size; 441 assert(payloadsz == _alignv(payloadsz, CHUNK_SIZE)); 442 } 443 444 large_objects = tail; 445 maybe_repurpose_single_chunk_large_objects_head(); 446 } 447 } 448 449 debug { 450 size_t payloadsz = cast(size_t)get_large_object_payload(best) + best.size; 451 assert(payloadsz == _alignv(payloadsz, CHUNK_SIZE)); 452 } 453 return best; 454 } 455 456 _freelist_t* obtain_small_objects(chunk_kind kind) { 457 _freelist_t** whole_chunk_freelist = &small_object_freelists[chunk_kind.GRANULES_32]; 458 void *chunk; 459 if (*whole_chunk_freelist) { 460 chunk = *whole_chunk_freelist; 461 *whole_chunk_freelist = (*whole_chunk_freelist).next; 462 } else { 463 chunk = allocate_large_object(0); 464 if (!chunk) { 465 return null; 466 } 467 } 468 469 void* ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 470 void* end = ptr + CHUNK_SIZE; 471 _freelist_t* next = null; 472 size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 473 for (size_t i = size; i <= CHUNK_SIZE; i += size) { 474 _freelist_t* head = cast(_freelist_t*)(end - i); 475 head.next = next; 476 next = head; 477 } 478 return next; 479 } 480 481 pragma(inline, true) 482 size_t size_to_granules(size_t size) { 483 return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 484 } 485 486 pragma(inline, true) 487 _freelist_t** get_small_object_freelist(chunk_kind kind) { 488 assert(kind < chunk_kind.SMALL_OBJECT_CHUNK_KINDS); 489 return &small_object_freelists[kind]; 490 } 491 492 void* allocate_small(chunk_kind kind) { 493 _freelist_t** loc = get_small_object_freelist(kind); 494 if (!*loc) { 495 _freelist_t* freelist = obtain_small_objects(kind); 496 if (!freelist) 497 return null; 498 499 *loc = freelist; 500 } 501 502 _freelist_t* ret = *loc; 503 *loc = ret.next; 504 return cast(void*)ret; 505 } 506 507 void* allocate_large(size_t size) { 508 _large_object_t* obj = allocate_large_object(size); 509 return obj ? get_large_object_payload(obj) : null; 510 }