Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make global/static variables thread_local; use mmap() for managing memory #1161

Open
wants to merge 16 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/runtime/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ using memory_block_header = struct {
// Macro to define a new arena with the given ID. Supports IDs ranging from 0 to
// 127.
#define REGISTER_ARENA(name, id) \
static struct arena name = {.allocation_semispace_id = (id)}
static thread_local struct arena name = {.allocation_semispace_id = (id)}

#define MEM_BLOCK_START(ptr) \
((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)))

extern bool time_for_collection;
extern thread_local bool time_for_collection;

size_t get_gc_threshold();

Expand Down
4 changes: 2 additions & 2 deletions include/runtime/collect.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ using set_node = set::iterator::node_t;
using set_impl = set::iterator::tree_t;

extern "C" {
extern size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
extern bool collect_old;
extern thread_local size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
extern thread_local bool collect_old;
size_t get_size(uint64_t, uint16_t);
void migrate_static_roots(void);
void migrate(block **block_ptr);
Expand Down
8 changes: 7 additions & 1 deletion include/runtime/header.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,14 @@ size_t hash_k(block *);
void k_hash(block *, void *);
bool hash_enter(void);
void hash_exit(void);

#ifdef __MACH__
//
// thread_local disabled for Apple
//
extern bool gc_enabled;
#else
extern thread_local bool gc_enabled;
#endif
}

class k_elem {
Expand Down
33 changes: 30 additions & 3 deletions lib/codegen/CreateTerm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,18 +782,45 @@ llvm::Value *create_term::disable_gc() {
llvm::Constant *global
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
#ifdef __MACH__
//
// thread_local disabled for Apple
//
/*
global_var->setThreadLocal(true);
llvm::IRBuilder b(current_block_);
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
*/
#else
global_var->setThreadLocal(true);
auto *global_var_address = global_var;
#endif
auto *old_val = new llvm::LoadInst(
llvm::Type::getInt1Ty(ctx_), global_var, "was_enabled", current_block_);
llvm::Type::getInt1Ty(ctx_), global_var_address, "was_enabled",
current_block_);
new llvm::StoreInst(
llvm::ConstantInt::getFalse(ctx_), global_var, current_block_);
llvm::ConstantInt::getFalse(ctx_), global_var_address, current_block_);
return old_val;
}

void create_term::enable_gc(llvm::Value *was_enabled) {
llvm::Constant *global
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
new llvm::StoreInst(was_enabled, global_var, current_block_);
#ifdef __MACH__
//
// thread_local disabled for Apple
//
/*
global_var->setThreadLocal(true);
llvm::IRBuilder b(current_block_);
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
*/
#else
global_var->setThreadLocal(true);
auto *global_var_address = global_var;
#endif
new llvm::StoreInst(was_enabled, global_var_address, current_block_);
}

// We use tailcc calling convention for apply_rule_* and eval_* functions to
Expand Down
13 changes: 11 additions & 2 deletions lib/codegen/Decision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "kllvm/codegen/ProofEvent.h"
#include "kllvm/codegen/Util.h"

#include "llvm/IR/IRBuilder.h"
#include <llvm/ADT/APInt.h>
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/StringMap.h>
Expand Down Expand Up @@ -1012,9 +1013,17 @@ std::pair<std::vector<llvm::Value *>, llvm::BasicBlock *> step_function_header(

auto *collection = module->getOrInsertGlobal(
"time_for_collection", llvm::Type::getInt1Ty(module->getContext()));
llvm::cast<llvm::GlobalVariable>(collection)->setThreadLocal(true);
#ifdef __MACH__
llvm::IRBuilder b(check_collect);
auto *collection_address = b.CreateThreadLocalAddress(collection);
#else
auto *collection_address = collection;
#endif

auto *is_collection = new llvm::LoadInst(
llvm::Type::getInt1Ty(module->getContext()), collection, "is_collection",
check_collect);
llvm::Type::getInt1Ty(module->getContext()), collection_address,
"is_collection", check_collect);
set_debug_loc(is_collection);
auto *collect = llvm::BasicBlock::Create(
module->getContext(), "isCollect", block->getParent());
Expand Down
95 changes: 58 additions & 37 deletions runtime/alloc/arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <sys/mman.h>

#include "runtime/alloc.h"
#include "runtime/arena.h"
Expand Down Expand Up @@ -47,36 +49,61 @@ get_arena_semispace_id_of_object(void *ptr) {
return mem_block_header(ptr)->semispace;
}

static void *first_superblock_ptr = nullptr;
static void *superblock_ptr = nullptr;
static char **next_superblock_ptr = nullptr;
static unsigned blocks_left = 0;
//
// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server.
//
size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
static thread_local void *hyperblock_ptr = nullptr; // only needed for munmap()

static void *megabyte_malloc() {
if (blocks_left == 0) {
blocks_left = 15;
if (int result
= posix_memalign(&superblock_ptr, BLOCK_SIZE, BLOCK_SIZE * 15)) {
errno = result;
perror("posix_memalign");
}
if (!first_superblock_ptr) {
first_superblock_ptr = superblock_ptr;
}
if (next_superblock_ptr) {
*next_superblock_ptr = (char *)superblock_ptr;
//
// Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment.
//
static thread_local char *currentblock_ptr
= nullptr; // char* rather than void* to permit pointer arithmetic
if (currentblock_ptr) {
//
// We expect an page fault due to not being able to map physical memory to this block or the
// process to be killed by the OOM killer long before we run off the end of our address space.
//
currentblock_ptr += BLOCK_SIZE;
} else {
//
// First call - need to reserve the address space.
//
size_t request = HYPERBLOCK_SIZE;
void *addr = mmap(
nullptr, // let OS choose the address
request, // Linux and MacOS both allow up to 64TB
PROT_READ | PROT_WRITE, // read, write but not execute
MAP_ANONYMOUS | MAP_PRIVATE
| MAP_NORESERVE, // allocate address space only
-1, // no file backing
0); // no offset
if (addr == MAP_FAILED) {
perror("mmap()");
abort();
}
auto *hdr = (memory_block_header *)superblock_ptr;
next_superblock_ptr = &hdr->next_superblock;
hdr->next_superblock = nullptr;
hyperblock_ptr = addr;
//
// We ask for one block worth of address space less than we allocated so alignment will always succeed.
// We don't worry about unused address space either side of our aligned address space because there will be no
// memory mapped to it.
//
currentblock_ptr = reinterpret_cast<char *>(
std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request));
}
blocks_left--;
void *result = superblock_ptr;
superblock_ptr = (char *)superblock_ptr + BLOCK_SIZE;
return result;
return currentblock_ptr;
}

bool time_for_collection;
void free_all_memory() {
//
// Frees all memory that was demand paged into this address range.
//
munmap(hyperblock_ptr, HYPERBLOCK_SIZE);
}

thread_local bool time_for_collection;

static void fresh_block(struct arena *arena) {
char *next_block = nullptr;
Expand Down Expand Up @@ -122,7 +149,14 @@ static void fresh_block(struct arena *arena) {
BLOCK_SIZE - sizeof(memory_block_header));
}

#ifdef __MACH__
//
// thread_local disabled for Apple
//
bool gc_enabled = true;
#else
thread_local bool gc_enabled = true;
#endif

__attribute__((noinline)) void *
do_alloc_slow(size_t requested, struct arena *arena) {
Expand Down Expand Up @@ -229,16 +263,3 @@ size_t arena_size(const struct arena *arena) {
: arena->num_collection_blocks)
* (BLOCK_SIZE - sizeof(memory_block_header));
}

void free_all_memory() {
auto *superblock = (memory_block_header *)first_superblock_ptr;
while (superblock) {
auto *next_superblock = (memory_block_header *)superblock->next_superblock;
free(superblock);
superblock = next_superblock;
}
first_superblock_ptr = nullptr;
superblock_ptr = nullptr;
next_superblock_ptr = nullptr;
blocks_left = 0;
}
2 changes: 1 addition & 1 deletion runtime/alloc/register_gc_roots_enum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "runtime/collect.h"
#include "runtime/header.h"

std::vector<BlockEnumerator> block_enumerators;
thread_local std::vector<BlockEnumerator> block_enumerators;

void register_gc_roots_enumerator(BlockEnumerator f) {
block_enumerators.push_back(f);
Expand Down
4 changes: 2 additions & 2 deletions runtime/arithmetic/int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,8 @@ void int_hash(mpz_t i, void *hasher) {
}
}

gmp_randstate_t kllvm_rand_state;
bool kllvm_rand_state_initialized = false;
thread_local gmp_randstate_t kllvm_rand_state;
thread_local bool kllvm_rand_state_initialized = false;

SortK hook_INT_srand(SortInt seed) {
if (!kllvm_rand_state_initialized) {
Expand Down
10 changes: 5 additions & 5 deletions runtime/collect/collect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ char **old_alloc_ptr(void);
char *youngspace_ptr(void);
char *oldspace_ptr(void);

static bool is_gc = false;
bool collect_old = false;
static thread_local bool is_gc = false;
bool thread_local collect_old = false;
#ifndef GC_DBG
static uint8_t num_collection_only_young = 0;
static thread_local uint8_t num_collection_only_young = 0;
#else
static char *last_alloc_ptr;
static thread_local char *last_alloc_ptr;
#endif

size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
size_t thread_local numBytesLiveAtCollection[1 << AGE_WIDTH];

bool during_gc() {
return is_gc;
Expand Down
6 changes: 3 additions & 3 deletions runtime/collect/migrate_static_roots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

#include "runtime/collect.h"

extern std::vector<BlockEnumerator> block_enumerators;
extern thread_local std::vector<BlockEnumerator> block_enumerators;

extern gmp_randstate_t kllvm_rand_state;
extern bool kllvm_rand_state_initialized;
extern thread_local gmp_randstate_t kllvm_rand_state;
extern thread_local bool kllvm_rand_state_initialized;

extern "C" {

Expand Down
4 changes: 2 additions & 2 deletions runtime/lto/alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ static inline void *kore_alloc_collection(kllvm::sort_category cat) {
void *mem
= kore_alloc(sizeof(blockheader) + sizeof(collection) + sizeof(uint64_t));
auto *hdr = (blockheader *)mem;
static std::string name = get_raw_symbol_name(cat) + "{}";
static blockheader hdr_val
static thread_local std::string name = get_raw_symbol_name(cat) + "{}";
static thread_local blockheader hdr_val
= get_block_header_for_symbol(get_tag_for_symbol_name(name.c_str()));
*hdr = hdr_val;
auto *offset = (uint64_t *)(hdr + 1);
Expand Down
8 changes: 8 additions & 0 deletions unittests/runtime-collections/lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,15 @@ block D1 = {{1}};
block *DUMMY1 = &D1;
}

#ifdef __MACH__
//
// thread_local disabled for Apple
//
bool gc_enabled;
#else
thread_local bool gc_enabled;
#endif

size_t get_gc_threshold() {
return SIZE_MAX;
}
Expand Down
Loading