You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
419 lines
8.9 KiB
C
419 lines
8.9 KiB
C
/*
|
|
* BFVM by matthilde
|
|
*
|
|
* bfvm.c
|
|
*
|
|
* BFVM is an optimized brainfuck interpreter using a VM.
|
|
* This is a modified version featuring a JIT.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdarg.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <sys/mman.h>
|
|
|
|
#define TAPESZ 8192
|
|
#define curvalue (b->tape[b->dp])
|
|
|
|
#define MAX(x,y) (x > y ? x : y)
|
|
#define MIN(x,y) (x > y ? y : x)
|
|
|
|
typedef unsigned char byte;
|
|
typedef unsigned short word;
|
|
typedef unsigned int tapeptr;
|
|
|
|
typedef enum {
|
|
ADD,
|
|
SUB,
|
|
INPUT,
|
|
OUTPUT,
|
|
NEXT,
|
|
PREV,
|
|
JZ, // Jump if Zero
|
|
JNZ, // Jump if not Zero
|
|
INSLEN, // Not an instruction, just to define the end of the struct
|
|
HALT // Special instruction
|
|
} Instructions;
|
|
|
|
typedef struct {
|
|
Instructions inst;
|
|
unsigned int operand;
|
|
} BFInstruction;
|
|
|
|
//// JIT STUFF ////
|
|
typedef struct JITModel {
|
|
byte *program;
|
|
size_t len;
|
|
size_t arglen;
|
|
} JITModel;
|
|
|
|
JITModel bootstrap = {
|
|
"\x48\x31\xc0" // xor rax, rax
|
|
"\x48\x31\xff" // xor rdi, rdi
|
|
"\x48\x31\xf6" // xor rsi, rsi
|
|
"\x48\x31\xd2" // xor rdx, rdx
|
|
"\x48\xba", // mov rdx, ...
|
|
14, 8};
|
|
JITModel endofprgm = {
|
|
"\xb8\x3c\0\0\0"
|
|
"\x48\x31\xff"
|
|
"\x0f\x05", 10, 0};
|
|
|
|
JITModel jitops[] = {
|
|
{"\x80\x02", 2, 1}, // add byte[rdx], 0x??
|
|
{"\x80\x2a", 2, 1}, // sub byte[rdx], 0x??
|
|
{
|
|
"\x52" // push rdx
|
|
"\xb8\0\0\0\0" // mov eax, 0
|
|
"\x48\x31\xff" // xor rdi, rdi
|
|
"\x48\x89\xd6" // mov rsi, rdx
|
|
"\xba\x01\0\0\0" // mov edx, 1
|
|
"\x0f\x05" // syscall
|
|
"\x5a"
|
|
, 20, 0},
|
|
{
|
|
"\x52" // push rdx
|
|
"\xb8\x01\0\0\0" // mov eax, 1
|
|
"\xbf\x01\0\0\0" // mov edi, 1
|
|
"\x48\x89\xd6" // mov rsi, rdx
|
|
"\xba\x01\0\0\0" // mov edx, 1
|
|
"\x0f\x05" // syscall
|
|
"\x5a" // pop rdx
|
|
, 22, 0},
|
|
{"\x48\x81\xc2", 3, 4}, // add rdx, 0x????????
|
|
{"\x48\x81\xea", 3, 4}, // sub rdx, 0x????????
|
|
{
|
|
"\x80\x3a\0" // cmp byte [rdx], 0
|
|
"\x0f\x84", 5, 4}, // jz near 0x????????
|
|
{ "\x80\x3a\0" // cmp byte [rdx], 0
|
|
"\x0f\x85", 5, 4}, // jnz near 0x????????
|
|
{"", 0, 0},
|
|
};
|
|
///////////////////
|
|
|
|
typedef BFInstruction Program[0x10000];
|
|
|
|
typedef struct {
|
|
byte tape[TAPESZ];
|
|
Program program;
|
|
|
|
tapeptr ip;
|
|
tapeptr dp;
|
|
|
|
bool verbose;
|
|
} Brainfuck;
|
|
|
|
typedef unsigned int uint;
|
|
typedef void (*instruction)(Brainfuck*,uint);
|
|
|
|
// INSTRUCTIONS
|
|
void i_add(Brainfuck* b, uint o){ curvalue += o; };
|
|
void i_sub(Brainfuck* b, uint o){ curvalue -= o; };
|
|
void i_input(Brainfuck* b, uint o){ curvalue = (byte)getchar(); };
|
|
void i_output(Brainfuck* b, uint o){ putchar((char)curvalue); };
|
|
void i_next(Brainfuck* b, uint o){ b->dp += o; };
|
|
void i_prev(Brainfuck* b, uint o){ b->dp -= o; };
|
|
void i_jz(Brainfuck* b, uint o){ if (curvalue == 0) b->ip = o; };
|
|
void i_jnz(Brainfuck* b, uint o){ if (curvalue) b->ip = o; };
|
|
|
|
// Instruction bindings
|
|
const char* inst_chars = "+-,.><[]";
|
|
instruction inst_funcs[INSLEN] = {
|
|
i_add, i_sub, i_input, i_output, i_next, i_prev, i_jz, i_jnz
|
|
};
|
|
|
|
// Find the matching bytecode of the instruction.
|
|
int find_instruction(char c) {
|
|
for (int i = 0; inst_chars[i]; ++i)
|
|
if (inst_chars[i] == c)
|
|
return i;
|
|
return -1;
|
|
}
|
|
|
|
// Verbose logs
|
|
void verbose(Brainfuck* b, const char* fmt, ...) {
|
|
if (b->verbose) {
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
|
|
fprintf(stderr, "[VERBOSE] ");
|
|
vfprintf(stderr, fmt, args);
|
|
fputc('\n', stderr);
|
|
|
|
va_end(args);
|
|
}
|
|
}
|
|
|
|
// Dying functions
|
|
static void pdie(const char* s) {
|
|
perror(s);
|
|
exit(1);
|
|
}
|
|
|
|
static void ddie(unsigned int linenum,
|
|
unsigned int charnum,
|
|
const char* s) {
|
|
fprintf(stderr, "Error at L%d, C%d\n%s\n", linenum, charnum, s);
|
|
exit(1);
|
|
}
|
|
|
|
// Reverse instruction
|
|
Instructions reverse_inst[INSLEN] = {
|
|
SUB, ADD,
|
|
OUTPUT, INPUT,
|
|
PREV, NEXT,
|
|
JNZ, JZ
|
|
};
|
|
|
|
// Assembler
|
|
void assemble(Brainfuck* b, FILE* f) {
|
|
// A small thing that speeds up everything
|
|
BFInstruction* prgm = b->program;
|
|
|
|
// Some useful variables
|
|
int c;
|
|
Instructions i;
|
|
unsigned int linenum = 0; // For debugging logs
|
|
unsigned int charnum = 0;
|
|
|
|
// Assembler registers for optimization
|
|
Instructions mode = -1;
|
|
unsigned int acc = 0;
|
|
|
|
// Loop stack
|
|
unsigned int loopstack[512];
|
|
unsigned int sp = 0;
|
|
|
|
// Location pointer
|
|
unsigned int lp = 0;
|
|
|
|
verbose(b, "[ASSEMBLER] Assembling the program...");
|
|
while ((c=fgetc(f)) != EOF) {
|
|
charnum++;
|
|
if (c == '\n') {
|
|
charnum = 0;
|
|
linenum++;
|
|
}
|
|
else if ((i=find_instruction(c)) != -1) {
|
|
if (i != mode && (mode == -1 || i != reverse_inst[mode])) {
|
|
if (mode != -1) {
|
|
prgm[lp].inst = mode;
|
|
prgm[lp].operand = acc;
|
|
verbose(b, "Assembled! [%c, %d]",
|
|
inst_chars[mode], acc);
|
|
|
|
lp++;
|
|
}
|
|
mode = i;
|
|
acc = 0;
|
|
|
|
}
|
|
|
|
if (i == JZ) {
|
|
verbose(b, "Pushing location into loopstack...");
|
|
loopstack[sp++] = lp;
|
|
if (sp == 512)
|
|
ddie(linenum, charnum, "Loopstack overflow");
|
|
|
|
prgm[lp++].inst = i;
|
|
mode = -1;
|
|
}
|
|
else if (i == JNZ) {
|
|
if (sp == 0)
|
|
ddie(linenum, charnum, "Mismatching brackets");
|
|
|
|
acc = loopstack[--sp];
|
|
verbose(b, "Popping from stack... %ud", acc);
|
|
prgm[acc].operand = lp-1;
|
|
prgm[lp].inst = i;
|
|
prgm[lp++].operand = acc;
|
|
|
|
verbose(b, "JUMP INSTRUCTION");
|
|
verbose(b, "'[' - %d", lp);
|
|
verbose(b, "']' - %d", acc);
|
|
acc = 0;
|
|
mode = -1;
|
|
}
|
|
else if (i == OUTPUT || i == INPUT) {
|
|
verbose(b, "Assembled! %c", c);
|
|
prgm[lp++].inst = i;
|
|
mode = -1;
|
|
}
|
|
else if (i == mode || reverse_inst[i] == mode) {
|
|
if (i == mode)
|
|
acc++;
|
|
else if (acc == 0) {
|
|
mode = i;
|
|
acc = 1;
|
|
}
|
|
else
|
|
acc--;
|
|
|
|
}
|
|
}
|
|
}
|
|
if (mode != -1 && acc) {
|
|
prgm[lp].inst = i;
|
|
prgm[lp].operand = acc;
|
|
lp++;
|
|
}
|
|
prgm[lp].inst = HALT;
|
|
|
|
if (sp != 0)
|
|
ddie(linenum, charnum, "Expected bracket but got EOF");
|
|
}
|
|
|
|
// Compile
|
|
static size_t calculate_compilation_malloc_size(BFInstruction* p) {
|
|
size_t result = bootstrap.len + bootstrap.arglen;
|
|
JITModel *op;
|
|
for (size_t i = 0; p[i].inst != HALT; ++i) {
|
|
op = &jitops[p[i].inst];
|
|
result += op->len + op->arglen;
|
|
}
|
|
result += endofprgm.len;
|
|
|
|
return result;
|
|
}
|
|
static size_t calculate_jump_size(BFInstruction* p, size_t a, size_t b) {
|
|
int sign = 1;
|
|
if (a > b) {
|
|
a ^= b;
|
|
b ^= a;
|
|
a ^= b;
|
|
sign = -1;
|
|
} else b += 2;
|
|
size_t s = 0;
|
|
JITModel* m;
|
|
for (; a < b; ++a) {
|
|
m = &jitops[p[a].inst];
|
|
s += m->len + m->arglen;
|
|
}
|
|
return s * sign;
|
|
}
|
|
byte* compile_bytecode(BFInstruction* p, size_t* s, byte* tape) {
|
|
size_t psize = calculate_compilation_malloc_size(p);
|
|
*s = psize;
|
|
byte* bin = (byte*)malloc(sizeof(byte) * psize);
|
|
if (!bin) {
|
|
*s = -1;
|
|
return NULL;
|
|
}
|
|
size_t lp = bootstrap.len; // location pointer
|
|
JITModel *op;
|
|
ssize_t tmp;
|
|
|
|
// Copy bootstrap code
|
|
memcpy(bin, bootstrap.program, bootstrap.len);
|
|
memcpy(bin + lp, (char*)&tape, 8);
|
|
lp += 8;
|
|
|
|
for (size_t i = 0; p[i].inst != HALT; ++i) {
|
|
op = &jitops[p[i].inst];
|
|
memcpy(bin + lp, op->program, op->len);
|
|
lp += op->len;
|
|
|
|
// argument
|
|
switch (p[i].inst) {
|
|
case ADD: case SUB:
|
|
*(bin + lp) = p[i].operand & 0xff;
|
|
break;
|
|
case NEXT: case PREV:
|
|
memcpy(bin + lp, (char*)&(p[i].operand), 4);
|
|
break;
|
|
case JZ: case JNZ:
|
|
tmp = calculate_jump_size(p, i, p[i].operand) - (
|
|
op->len + op->arglen);
|
|
memcpy(bin + lp, (char*)&tmp, 8);
|
|
break;
|
|
}
|
|
lp += op->arglen;
|
|
}
|
|
memcpy(bin + lp, endofprgm.program, endofprgm.len);
|
|
|
|
return bin;
|
|
}
|
|
// Run it!!!
|
|
void run_amd64_code(byte* code, size_t s) {
|
|
byte* mmap_code = mmap(NULL, s,
|
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
|
memcpy(mmap_code, code, s);
|
|
void (*runcode)(void) = (void (*)(void))mmap_code;
|
|
runcode();
|
|
}
|
|
|
|
// Execute the code
|
|
void execute_instruction(Brainfuck* b) {
|
|
BFInstruction i = b->program[b->ip];
|
|
inst_funcs[i.inst](b, i.operand);
|
|
++(b->ip);
|
|
}
|
|
|
|
void execute_code(Brainfuck* b) {
|
|
b->ip = 0;
|
|
b->dp = 0;
|
|
|
|
while (b->program[b->ip].inst != HALT)
|
|
execute_instruction(b);
|
|
}
|
|
|
|
// Main program
|
|
int main(int argc, char **argv) {
|
|
if (argc != 2 && argc != 3) {
|
|
display_usage:
|
|
puts("USAGE: bfvm [-vjc] FILENAME");
|
|
return 1;
|
|
}
|
|
|
|
Brainfuck bf;
|
|
enum {
|
|
INTERPRET_MODE,
|
|
COMPILE_MODE,
|
|
JIT_MODE
|
|
} mode = INTERPRET_MODE;
|
|
|
|
bf.verbose = false;
|
|
if (argc == 3 && argv[1][0] == '-')
|
|
for (int i = 1; argv[1][i]; ++i) {
|
|
switch (argv[1][i]) {
|
|
case 'v': bf.verbose = true; break;
|
|
case 'j': mode = JIT_MODE; break;
|
|
case 'c': mode = COMPILE_MODE; break;
|
|
default: goto display_usage; break;
|
|
}
|
|
}
|
|
verbose(&bf, "WARNING! Verbose logs has been enabled!");
|
|
if (mode == JIT_MODE) verbose(&bf, "Jit mode has been enabled!");
|
|
|
|
bf.ip = 0;
|
|
bf.dp = 0;
|
|
|
|
FILE* f = fopen(argv[argc-1], "r");
|
|
if (!f)
|
|
pdie("bfvm: fopen");
|
|
|
|
verbose(&bf, "[1] Assembling code...");
|
|
assemble(&bf, f);
|
|
if (mode != INTERPRET_MODE) {
|
|
verbose(&bf, "[2] Compiling bytecode...");
|
|
size_t s;
|
|
byte* program = compile_bytecode(bf.program, &s, bf.tape);
|
|
if (!program) pdie("bfvm: malloc");
|
|
|
|
if (mode == COMPILE_MODE)
|
|
fwrite(program, 1, s, stdout);
|
|
else {
|
|
verbose(&bf, "[3] Running compiled code...");
|
|
run_amd64_code(program, s);
|
|
}
|
|
free(program);
|
|
} else {
|
|
verbose(&bf, "[2] Executing code...");
|
|
execute_code(&bf);
|
|
}
|
|
|
|
return 0;
|
|
}
|