1 /**
2     A small malloc implementation for use in WebAssembly targets
3 
4     Copyright (c) 2023-2025, Kitsunebi Games
5     Copyright (c) 2023-2025, Inochi2D Project
6     Copyright (c) 2020, Igalia, S.L.
7     
8     Distributed under an MIT-style License.
9     (See accompanying LICENSE file or copy at
10     https://github.com/wingo/walloc/blob/master/LICENSE.md)
11 */
12 module walloc;
13 import ldc.intrinsics : 
14     llvm_wasm_memory_grow, 
15     llvm_wasm_memory_size,
16     llvm_memmove;
17 
18 extern(C) @nogc nothrow:
19 
20 void* malloc(size_t size) @nogc nothrow @system {
21     if (size == 0)
22         return null;
23 
24     size_t granules = size_to_granules(size);
25     chunk_kind kind = granules_to_chunk_kind(granules);
26     return (kind == chunk_kind.LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
27 }
28 
29 export
30 void free(void *ptr) @nogc nothrow @system {
31     if (!ptr) return;
32 
33     _page_t* page = get_page(ptr);
34     size_t chunk = get_chunk_index(ptr);
35     ubyte kind = page.header.chunk_kinds[chunk];
36     if (kind == chunk_kind.LARGE_OBJECT) {
37         _large_object_t* obj = get_large_object(ptr);
38         obj.next = large_objects;
39         large_objects = obj;
40         allocate_chunk(page, chunk, chunk_kind.FREE_LARGE_OBJECT);
41         pending_large_object_compact = 1;
42     } else {
43         size_t granules = kind;
44         _freelist_t** loc = get_small_object_freelist(cast(chunk_kind)granules);
45         _freelist_t* obj = cast(_freelist_t*)ptr;
46         obj.next = *loc;
47         *loc = obj;
48     }
49 }
50 
51 export
52 void* realloc(void* ptr, size_t newSize) @nogc nothrow @system {
53     if (!ptr)
54         return malloc(newSize);
55 
56     size_t oldSize = get_alloc_size(ptr);
57     if (oldSize <= newSize)
58         return ptr;
59     
60     // Size is bigger, realloc just to be sure.
61     void* n_mem = malloc(newSize);
62     llvm_memmove(n_mem, ptr, oldSize, true);
63     free(ptr);
64     return n_mem;
65 }
66 
67 private:
68 
69 size_t get_alloc_size(void* ptr) {
70     _page_t* page = get_page(ptr);
71     size_t chunk = get_chunk_index(ptr);
72     chunk_kind kind = cast(chunk_kind)page.header.chunk_kinds[chunk];
73 
74     if (kind == chunk_kind.LARGE_OBJECT) {
75         _large_object_t* obj = get_large_object(ptr);
76         return obj.size;
77     }
78     
79     ptrdiff_t granules = chunk_kind_to_granules(kind);
80     if (granules <= chunk_kind.SMALL_OBJECT_CHUNK_KINDS)
81         return granules * GRANULE_SIZE;
82     
83     return 0;
84 }
85 
86 extern __gshared void* __heap_base;
87 __gshared size_t walloc_heap_size;
88 __gshared _freelist_t*[chunk_kind.SMALL_OBJECT_CHUNK_KINDS] small_object_freelists;
89 __gshared _large_object_t* large_objects;
90 
91 
92 pragma(inline, true)
93 size_t _max(size_t a, size_t b) { return a < b ? b : a; }
94 
95 pragma(inline, true)
96 size_t _alignv(size_t val, size_t alignment) { return (val + alignment - 1) & ~(alignment - 1); }
97 
98 pragma(inline, true)
99 extern(D)
100 void __assert_aligned(T, Y)(T x, Y y) {
101     assert(cast(size_t)x == _alignv(cast(size_t)x, cast(size_t)y));
102 }
103 
104 enum size_t CHUNK_SIZE = 256;
105 enum size_t CHUNK_SIZE_LOG_2 = 8;
106 enum size_t CHUNK_MASK = (CHUNK_SIZE - 1);
107 enum size_t PAGE_SIZE = 65536;
108 enum size_t PAGE_SIZE_LOG_2 = 16;
109 enum size_t PAGE_MASK = (PAGE_SIZE - 1);
110 enum size_t CHUNKS_PER_PAGE = 256;
111 enum size_t GRANULE_SIZE = 8;
112 enum size_t GRANULE_SIZE_LOG_2 = 3;
113 enum size_t LARGE_OBJECT_THRESHOLD = 256;
114 enum size_t LARGE_OBJECT_GRANULE_THRESHOLD = 32;
115 enum size_t FIRST_ALLOCATABLE_CHUNK = 1;
116 enum size_t PAGE_HEADER_SIZE = _page_header_t.sizeof;
117 enum size_t LARGE_OBJECT_HEADER_SIZE = _large_object_t.sizeof;
118 
119 static assert(PAGE_SIZE == CHUNK_SIZE * CHUNKS_PER_PAGE);
120 static assert(CHUNK_SIZE == 1 << CHUNK_SIZE_LOG_2);
121 static assert(PAGE_SIZE == 1 << PAGE_SIZE_LOG_2);
122 static assert(GRANULE_SIZE == 1 << GRANULE_SIZE_LOG_2);
123 static assert(LARGE_OBJECT_THRESHOLD == 
124     LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
125 
126 struct _chunk_t {
127     void[CHUNK_SIZE] data;
128 }
129 
130 enum chunk_kind : ubyte {
131     GRANULES_1,
132     GRANULES_2,
133     GRANULES_3,
134     GRANULES_4,
135     GRANULES_5,
136     GRANULES_6,
137     GRANULES_8,
138     GRANULES_10,
139     GRANULES_16,
140     GRANULES_32,
141 
142     SMALL_OBJECT_CHUNK_KINDS,
143     FREE_LARGE_OBJECT = 254,
144     LARGE_OBJECT = 255
145 }
146 
147 __gshared const ubyte[] small_object_granule_sizes = [
148     1, 2, 3, 4, 5, 6, 8, 10, 16, 32
149 ];
150 
151 pragma(inline, true)
152 chunk_kind granules_to_chunk_kind(size_t granules) {
153     static foreach(gsize; small_object_granule_sizes) {
154         if (granules <= gsize) 
155             return mixin(q{chunk_kind.GRANULES_}, cast(int)gsize);
156     }
157     return chunk_kind.LARGE_OBJECT;
158 }
159 
160 pragma(inline, true)
161 ubyte chunk_kind_to_granules(chunk_kind kind) {
162     static foreach(gsize; small_object_granule_sizes) {
163         if (kind == mixin(q{chunk_kind.GRANULES_}, cast(int)gsize)) 
164             return gsize;
165     }
166     return cast(ubyte)-1;
167 }
168 
169 struct _page_header_t {
170     ubyte[CHUNKS_PER_PAGE] chunk_kinds;
171 }
172 
173 struct _page_t {
174     union {
175         _page_header_t header;
176         _chunk_t[CHUNKS_PER_PAGE] chunks;
177     }
178 }
179 
180 pragma(inline, true)
181 _page_t* get_page(void *ptr) {
182     return cast(_page_t*)cast(void*)((cast(size_t) ptr) & ~PAGE_MASK);
183 }
184 
185 pragma(inline, true)
186 static size_t get_chunk_index(void *ptr) {
187     return ((cast(size_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
188 }
189 
190 struct _freelist_t {
191   _freelist_t *next;
192 }
193 
194 struct _large_object_t {
195   _large_object_t* next;
196   size_t size;
197 }
198 
199 pragma(inline, true)
200 void* get_large_object_payload(_large_object_t *obj) {
201   return (cast(void*)obj) + LARGE_OBJECT_HEADER_SIZE;
202 }
203 
204 pragma(inline, true)
205 _large_object_t* get_large_object(void *ptr) {
206   return cast(_large_object_t*)ptr - LARGE_OBJECT_HEADER_SIZE;
207 }
208 
209 _page_t* allocate_pages(size_t payloadSize, size_t* allocated) {
210     size_t needed = payloadSize + PAGE_HEADER_SIZE;
211     size_t heap_size = llvm_wasm_memory_size(0) * PAGE_SIZE;
212     size_t base = heap_size;
213     size_t preallocated = 0, grow = 0;
214 
215     if (!walloc_heap_size) {
216         // We are allocating the initial pages, if any.  We skip the first 64 kB,
217         // then take any additional space up to the memory size.
218         size_t heap_base = _alignv(cast(size_t)&__heap_base, PAGE_SIZE);
219         preallocated = heap_size - heap_base; // Preallocated pages.
220         walloc_heap_size = preallocated;
221         base -= preallocated;
222     }
223 
224     if (preallocated < needed) {
225         // Always grow the walloc heap at least by 50%.
226         grow = _alignv(_max(walloc_heap_size / 2, needed - preallocated),
227                         PAGE_SIZE);
228         
229         assert(grow);
230         if (llvm_wasm_memory_grow(0, cast(int)(grow >> PAGE_SIZE_LOG_2)) == -1) {
231             return null;
232         }
233 
234         walloc_heap_size += grow;
235     }
236 
237     _page_t* ret = cast(_page_t*)base;
238     size_t size = grow + preallocated;
239 
240     assert(size);
241     assert(size == _alignv(size, PAGE_SIZE));
242     *allocated = size / PAGE_SIZE;
243     return ret;
244 }
245 
246 void* allocate_chunk(_page_t* page, size_t idx, chunk_kind kind) {
247     page.header.chunk_kinds[idx] = kind;
248     return page.chunks[idx].data.ptr;
249 }
250 
251 // It's possible for splitting to produce a large object of size 248 (256 minus
252 // the header size) -- i.e. spanning a single chunk.  In that case, push the
253 // chunk back on the GRANULES_32 small object freelist.
254 void maybe_repurpose_single_chunk_large_objects_head() {
255     if (large_objects.size < CHUNK_SIZE) {
256         size_t idx = get_chunk_index(large_objects);
257         void* ptr = allocate_chunk(get_page(large_objects), idx, chunk_kind.GRANULES_32);
258         large_objects = large_objects.next;
259         _freelist_t* head = cast(_freelist_t*)ptr;
260         head.next = small_object_freelists[chunk_kind.GRANULES_32];
261         small_object_freelists[chunk_kind.GRANULES_32] = head;
262     }
263 }
264 
265 // If there have been any large-object frees since the last large object
266 // allocation, go through the freelist and merge any adjacent objects.
267 __gshared int pending_large_object_compact = 0;
268 _large_object_t** maybe_merge_free_large_object(_large_object_t** prev) {
269     _large_object_t* obj = *prev;
270 
271     while(true) {
272         void* end = get_large_object_payload(obj) + obj.size;
273         __assert_aligned(end, CHUNK_SIZE);
274 
275         size_t chunk = get_chunk_index(end);
276         if (chunk < FIRST_ALLOCATABLE_CHUNK) {
277             // Merging can't create a large object that newly spans the header chunk.
278             // This check also catches the end-of-heap case.
279             return prev;
280         }
281         _page_t* page = get_page(end);
282         if (page.header.chunk_kinds[chunk] != chunk_kind.FREE_LARGE_OBJECT) {
283             return prev;
284         }
285         _large_object_t* next = cast(_large_object_t*)end;
286 
287         _large_object_t** prev_prev = &large_objects;
288         _large_object_t* walk = large_objects;
289         while(true) {
290             assert(walk);
291             if (walk == next) {
292                 obj.size += LARGE_OBJECT_HEADER_SIZE + walk.size;
293                 *prev_prev = walk.next;
294                 if (prev == &walk.next) {
295                     prev = prev_prev;
296                 }
297                 break;
298             }
299             prev_prev = &walk.next;
300             walk = walk.next;
301         }
302     }
303 }
304 
305 void maybe_compact_free_large_objects() {
306     if (pending_large_object_compact) {
307         pending_large_object_compact = 0;
308         _large_object_t** prev = &large_objects;
309         while (*prev) {
310             prev = &(*maybe_merge_free_large_object(prev)).next;
311         }
312     }
313 }
314 
315 // Allocate a large object with enough space for SIZE payload bytes.  Returns a
316 // large object with a header, aligned on a chunk boundary, whose payload size
317 // may be larger than SIZE, and whose total size (header included) is
318 // chunk-aligned.  Either a suitable allocation is found in the large object
319 // freelist, or we ask the OS for some more pages and treat those pages as a
320 // large object.  If the allocation fits in that large object and there's more
321 // than an aligned chunk's worth of data free at the end, the large object is
322 // split.
323 //
324 // The return value's corresponding chunk in the page as starting a large
325 // object.
326 _large_object_t* allocate_large_object(size_t size) {
327     maybe_compact_free_large_objects();
328 
329     _large_object_t* best = null;
330     _large_object_t** best_prev = &large_objects;
331     size_t best_size = -1;
332 
333     _large_object_t** prev = &large_objects;
334     _large_object_t* walk = large_objects;
335     while (walk) {
336         if (walk.size >= size && walk.size < best_size) {
337             best_size = walk.size;
338             best = walk;
339             best_prev = prev;
340 
341             // Not going to do any better than this; just return it.
342             if (best_size + LARGE_OBJECT_HEADER_SIZE == _alignv(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
343                 break;
344         }
345 
346         prev = &walk.next;
347         walk = walk.next;
348     }
349 
350     if (!best) {
351         // The large object freelist doesn't have an object big enough for this
352         // allocation.  Allocate one or more pages from the OS, and treat that new
353         // sequence of pages as a fresh large object.  It will be split if
354         // necessary.
355         size_t size_with_header = size + _large_object_t.sizeof;
356         size_t n_allocated = 0;
357         _page_t* page = allocate_pages(size_with_header, &n_allocated);
358         if (!page) {
359             return null;
360         }
361 
362         void* ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, chunk_kind.LARGE_OBJECT);
363         best = cast(_large_object_t*)ptr;
364         size_t page_header = ptr - cast(void*)page;
365 
366         best.next = large_objects;
367         best.size = best_size = n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
368         assert(best_size >= size_with_header);
369     }
370 
371     allocate_chunk(get_page(best), get_chunk_index(best), chunk_kind.LARGE_OBJECT);
372 
373     _large_object_t* next = best.next;
374     *best_prev = next;
375 
376     size_t tail_size = (best_size - size) & ~CHUNK_MASK;
377     if (tail_size) {
378         // The best-fitting object has 1 or more aligned chunks free after the
379         // requested allocation; split the tail off into a fresh aligned object.
380         _page_t* start_page = get_page(best);
381         void* start = get_large_object_payload(best);
382         void* end = start + best_size;
383 
384         if (start_page == get_page(end - tail_size - 1)) {
385 
386             // The allocation does not span a page boundary; yay.
387             __assert_aligned(end, CHUNK_SIZE);
388         } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
389 
390             // If the allocation itself smaller than a page, split off the head, then
391             // fall through to maybe split the tail.
392             assert(cast(size_t)end == _alignv(cast(size_t)end, PAGE_SIZE));
393 
394             size_t first_page_size = PAGE_SIZE - (cast(size_t)start & PAGE_MASK);
395             _large_object_t* head = best;
396             allocate_chunk(start_page, get_chunk_index(start), chunk_kind.FREE_LARGE_OBJECT);
397             head.size = first_page_size;
398             head.next = large_objects;
399             large_objects = head;
400 
401             maybe_repurpose_single_chunk_large_objects_head();
402 
403             _page_t* next_page = start_page + 1;
404             void* ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, chunk_kind.LARGE_OBJECT);
405             best = cast(_large_object_t*)ptr;
406             best.size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
407             assert(best_size >= size);
408 
409             start = get_large_object_payload(best);
410             tail_size = (best_size - size) & ~CHUNK_MASK;
411         } else {
412 
413             // A large object that spans more than one page will consume all of its
414             // tail pages.  Therefore if the split traverses a page boundary, round up
415             // to page size.
416             __assert_aligned(end, PAGE_SIZE);
417             size_t first_page_size = PAGE_SIZE - (cast(size_t)start & PAGE_MASK);
418             size_t tail_pages_size = _alignv(size - first_page_size, PAGE_SIZE);
419             size = first_page_size + tail_pages_size;
420             tail_size = best_size - size;
421         }
422         best.size -= tail_size;
423 
424         size_t tail_idx = get_chunk_index(end - tail_size);
425         while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
426 
427             // We would be splitting in a page header; don't do that.
428             tail_size -= CHUNK_SIZE;
429             tail_idx++;
430         }
431 
432         if (tail_size) {
433             _page_t *page = get_page(end - tail_size);
434             void* tail_ptr = allocate_chunk(page, tail_idx, chunk_kind.FREE_LARGE_OBJECT);
435             _large_object_t* tail = cast(_large_object_t*) tail_ptr;
436             tail.next = large_objects;
437             tail.size = tail_size - LARGE_OBJECT_HEADER_SIZE;
438 
439             debug {
440                 size_t payloadsz = cast(size_t)get_large_object_payload(tail) + tail.size;
441                 assert(payloadsz == _alignv(payloadsz, CHUNK_SIZE));
442             }
443 
444             large_objects = tail;
445             maybe_repurpose_single_chunk_large_objects_head();
446         }
447     }
448 
449     debug {
450         size_t payloadsz = cast(size_t)get_large_object_payload(best) + best.size;
451         assert(payloadsz == _alignv(payloadsz, CHUNK_SIZE));
452     }
453     return best;
454 }
455 
456 _freelist_t* obtain_small_objects(chunk_kind kind) {
457     _freelist_t** whole_chunk_freelist = &small_object_freelists[chunk_kind.GRANULES_32];
458     void *chunk;
459     if (*whole_chunk_freelist) {
460         chunk = *whole_chunk_freelist;
461         *whole_chunk_freelist = (*whole_chunk_freelist).next;
462     } else {
463         chunk = allocate_large_object(0);
464         if (!chunk) {
465             return null;
466         }
467     }
468 
469     void* ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
470     void* end = ptr + CHUNK_SIZE;
471     _freelist_t* next = null;
472     size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
473     for (size_t i = size; i <= CHUNK_SIZE; i += size) {
474         _freelist_t* head = cast(_freelist_t*)(end - i);
475         head.next = next;
476         next = head;
477     }
478     return next;
479 }
480 
481 pragma(inline, true)
482 size_t size_to_granules(size_t size) {
483     return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
484 }
485 
486 pragma(inline, true)
487 _freelist_t** get_small_object_freelist(chunk_kind kind) {
488     assert(kind < chunk_kind.SMALL_OBJECT_CHUNK_KINDS);
489     return &small_object_freelists[kind];
490 }
491 
492 void* allocate_small(chunk_kind kind) {
493     _freelist_t** loc = get_small_object_freelist(kind);
494     if (!*loc) {
495         _freelist_t* freelist = obtain_small_objects(kind);
496         if (!freelist) 
497             return null;
498         
499         *loc = freelist;
500     }
501 
502     _freelist_t* ret = *loc;
503     *loc = ret.next;
504     return cast(void*)ret;
505 }
506 
507 void* allocate_large(size_t size) {
508   _large_object_t* obj = allocate_large_object(size);
509   return obj ? get_large_object_payload(obj) : null;
510 }