You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

419 lines
8.9 KiB
C

/*
* BFVM by matthilde
*
* bfvm.c
*
* BFVM is an optimized brainfuck interpreter using a VM.
* This is a modified version featuring a JIT.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <sys/mman.h>
#define TAPESZ 8192
#define curvalue (b->tape[b->dp])
#define MAX(x,y) (x > y ? x : y)
#define MIN(x,y) (x > y ? y : x)
typedef unsigned char byte;
typedef unsigned short word;
typedef unsigned int tapeptr;
typedef enum {
ADD,
SUB,
INPUT,
OUTPUT,
NEXT,
PREV,
JZ, // Jump if Zero
JNZ, // Jump if not Zero
INSLEN, // Not an instruction, just to define the end of the struct
HALT // Special instruction
} Instructions;
typedef struct {
Instructions inst;
unsigned int operand;
} BFInstruction;
//// JIT STUFF ////
typedef struct JITModel {
byte *program;
size_t len;
size_t arglen;
} JITModel;
JITModel bootstrap = {
"\x48\x31\xc0" // xor rax, rax
"\x48\x31\xff" // xor rdi, rdi
"\x48\x31\xf6" // xor rsi, rsi
"\x48\x31\xd2" // xor rdx, rdx
"\x48\xba", // mov rdx, ...
14, 8};
JITModel endofprgm = {
"\xb8\x3c\0\0\0"
"\x48\x31\xff"
"\x0f\x05", 10, 0};
JITModel jitops[] = {
{"\x80\x02", 2, 1}, // add byte[rdx], 0x??
{"\x80\x2a", 2, 1}, // sub byte[rdx], 0x??
{
"\x52" // push rdx
"\xb8\0\0\0\0" // mov eax, 0
"\x48\x31\xff" // xor rdi, rdi
"\x48\x89\xd6" // mov rsi, rdx
"\xba\x01\0\0\0" // mov edx, 1
"\x0f\x05" // syscall
"\x5a"
, 20, 0},
{
"\x52" // push rdx
"\xb8\x01\0\0\0" // mov eax, 1
"\xbf\x01\0\0\0" // mov edi, 1
"\x48\x89\xd6" // mov rsi, rdx
"\xba\x01\0\0\0" // mov edx, 1
"\x0f\x05" // syscall
"\x5a" // pop rdx
, 22, 0},
{"\x48\x81\xc2", 3, 4}, // add rdx, 0x????????
{"\x48\x81\xea", 3, 4}, // sub rdx, 0x????????
{
"\x80\x3a\0" // cmp byte [rdx], 0
"\x0f\x84", 5, 4}, // jz near 0x????????
{ "\x80\x3a\0" // cmp byte [rdx], 0
"\x0f\x85", 5, 4}, // jnz near 0x????????
{"", 0, 0},
};
///////////////////
typedef BFInstruction Program[0x10000];
typedef struct {
byte tape[TAPESZ];
Program program;
tapeptr ip;
tapeptr dp;
bool verbose;
} Brainfuck;
typedef unsigned int uint;
typedef void (*instruction)(Brainfuck*,uint);
// INSTRUCTIONS
void i_add(Brainfuck* b, uint o){ curvalue += o; };
void i_sub(Brainfuck* b, uint o){ curvalue -= o; };
void i_input(Brainfuck* b, uint o){ curvalue = (byte)getchar(); };
void i_output(Brainfuck* b, uint o){ putchar((char)curvalue); };
void i_next(Brainfuck* b, uint o){ b->dp += o; };
void i_prev(Brainfuck* b, uint o){ b->dp -= o; };
void i_jz(Brainfuck* b, uint o){ if (curvalue == 0) b->ip = o; };
void i_jnz(Brainfuck* b, uint o){ if (curvalue) b->ip = o; };
// Instruction bindings
const char* inst_chars = "+-,.><[]";
instruction inst_funcs[INSLEN] = {
i_add, i_sub, i_input, i_output, i_next, i_prev, i_jz, i_jnz
};
// Find the matching bytecode of the instruction.
int find_instruction(char c) {
for (int i = 0; inst_chars[i]; ++i)
if (inst_chars[i] == c)
return i;
return -1;
}
// Verbose logs
void verbose(Brainfuck* b, const char* fmt, ...) {
if (b->verbose) {
va_list args;
va_start(args, fmt);
fprintf(stderr, "[VERBOSE] ");
vfprintf(stderr, fmt, args);
fputc('\n', stderr);
va_end(args);
}
}
// Dying functions
static void pdie(const char* s) {
perror(s);
exit(1);
}
static void ddie(unsigned int linenum,
unsigned int charnum,
const char* s) {
fprintf(stderr, "Error at L%d, C%d\n%s\n", linenum, charnum, s);
exit(1);
}
// Reverse instruction
Instructions reverse_inst[INSLEN] = {
SUB, ADD,
OUTPUT, INPUT,
PREV, NEXT,
JNZ, JZ
};
// Assembler
void assemble(Brainfuck* b, FILE* f) {
// A small thing that speeds up everything
BFInstruction* prgm = b->program;
// Some useful variables
int c;
Instructions i;
unsigned int linenum = 0; // For debugging logs
unsigned int charnum = 0;
// Assembler registers for optimization
Instructions mode = -1;
unsigned int acc = 0;
// Loop stack
unsigned int loopstack[512];
unsigned int sp = 0;
// Location pointer
unsigned int lp = 0;
verbose(b, "[ASSEMBLER] Assembling the program...");
while ((c=fgetc(f)) != EOF) {
charnum++;
if (c == '\n') {
charnum = 0;
linenum++;
}
else if ((i=find_instruction(c)) != -1) {
if (i != mode && (mode == -1 || i != reverse_inst[mode])) {
if (mode != -1) {
prgm[lp].inst = mode;
prgm[lp].operand = acc;
verbose(b, "Assembled! [%c, %d]",
inst_chars[mode], acc);
lp++;
}
mode = i;
acc = 0;
}
if (i == JZ) {
verbose(b, "Pushing location into loopstack...");
loopstack[sp++] = lp;
if (sp == 512)
ddie(linenum, charnum, "Loopstack overflow");
prgm[lp++].inst = i;
mode = -1;
}
else if (i == JNZ) {
if (sp == 0)
ddie(linenum, charnum, "Mismatching brackets");
acc = loopstack[--sp];
verbose(b, "Popping from stack... %ud", acc);
prgm[acc].operand = lp-1;
prgm[lp].inst = i;
prgm[lp++].operand = acc;
verbose(b, "JUMP INSTRUCTION");
verbose(b, "'[' - %d", lp);
verbose(b, "']' - %d", acc);
acc = 0;
mode = -1;
}
else if (i == OUTPUT || i == INPUT) {
verbose(b, "Assembled! %c", c);
prgm[lp++].inst = i;
mode = -1;
}
else if (i == mode || reverse_inst[i] == mode) {
if (i == mode)
acc++;
else if (acc == 0) {
mode = i;
acc = 1;
}
else
acc--;
}
}
}
if (mode != -1 && acc) {
prgm[lp].inst = i;
prgm[lp].operand = acc;
lp++;
}
prgm[lp].inst = HALT;
if (sp != 0)
ddie(linenum, charnum, "Expected bracket but got EOF");
}
// Compile
static size_t calculate_compilation_malloc_size(BFInstruction* p) {
size_t result = bootstrap.len + bootstrap.arglen;
JITModel *op;
for (size_t i = 0; p[i].inst != HALT; ++i) {
op = &jitops[p[i].inst];
result += op->len + op->arglen;
}
result += endofprgm.len;
return result;
}
static size_t calculate_jump_size(BFInstruction* p, size_t a, size_t b) {
int sign = 1;
if (a > b) {
a ^= b;
b ^= a;
a ^= b;
sign = -1;
} else b += 2;
size_t s = 0;
JITModel* m;
for (; a < b; ++a) {
m = &jitops[p[a].inst];
s += m->len + m->arglen;
}
return s * sign;
}
byte* compile_bytecode(BFInstruction* p, size_t* s, byte* tape) {
size_t psize = calculate_compilation_malloc_size(p);
*s = psize;
byte* bin = (byte*)malloc(sizeof(byte) * psize);
if (!bin) {
*s = -1;
return NULL;
}
size_t lp = bootstrap.len; // location pointer
JITModel *op;
ssize_t tmp;
// Copy bootstrap code
memcpy(bin, bootstrap.program, bootstrap.len);
memcpy(bin + lp, (char*)&tape, 8);
lp += 8;
for (size_t i = 0; p[i].inst != HALT; ++i) {
op = &jitops[p[i].inst];
memcpy(bin + lp, op->program, op->len);
lp += op->len;
// argument
switch (p[i].inst) {
case ADD: case SUB:
*(bin + lp) = p[i].operand & 0xff;
break;
case NEXT: case PREV:
memcpy(bin + lp, (char*)&(p[i].operand), 4);
break;
case JZ: case JNZ:
tmp = calculate_jump_size(p, i, p[i].operand) - (
op->len + op->arglen);
memcpy(bin + lp, (char*)&tmp, 8);
break;
}
lp += op->arglen;
}
memcpy(bin + lp, endofprgm.program, endofprgm.len);
return bin;
}
// Run it!!!
void run_amd64_code(byte* code, size_t s) {
byte* mmap_code = mmap(NULL, s,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANON | MAP_PRIVATE, 0, 0);
memcpy(mmap_code, code, s);
void (*runcode)(void) = (void (*)(void))mmap_code;
runcode();
}
// Execute the code
void execute_instruction(Brainfuck* b) {
BFInstruction i = b->program[b->ip];
inst_funcs[i.inst](b, i.operand);
++(b->ip);
}
void execute_code(Brainfuck* b) {
b->ip = 0;
b->dp = 0;
while (b->program[b->ip].inst != HALT)
execute_instruction(b);
}
// Main program
int main(int argc, char **argv) {
if (argc != 2 && argc != 3) {
display_usage:
puts("USAGE: bfvm [-vjc] FILENAME");
return 1;
}
Brainfuck bf;
enum {
INTERPRET_MODE,
COMPILE_MODE,
JIT_MODE
} mode = INTERPRET_MODE;
bf.verbose = false;
if (argc == 3 && argv[1][0] == '-')
for (int i = 1; argv[1][i]; ++i) {
switch (argv[1][i]) {
case 'v': bf.verbose = true; break;
case 'j': mode = JIT_MODE; break;
case 'c': mode = COMPILE_MODE; break;
default: goto display_usage; break;
}
}
verbose(&bf, "WARNING! Verbose logs has been enabled!");
if (mode == JIT_MODE) verbose(&bf, "Jit mode has been enabled!");
bf.ip = 0;
bf.dp = 0;
FILE* f = fopen(argv[argc-1], "r");
if (!f)
pdie("bfvm: fopen");
verbose(&bf, "[1] Assembling code...");
assemble(&bf, f);
if (mode != INTERPRET_MODE) {
verbose(&bf, "[2] Compiling bytecode...");
size_t s;
byte* program = compile_bytecode(bf.program, &s, bf.tape);
if (!program) pdie("bfvm: malloc");
if (mode == COMPILE_MODE)
fwrite(program, 1, s, stdout);
else {
verbose(&bf, "[3] Running compiled code...");
run_amd64_code(program, s);
}
free(program);
} else {
verbose(&bf, "[2] Executing code...");
execute_code(&bf);
}
return 0;
}