FVM

FVM is an embedded programming system that utilizes a language with a C-style syntax. Its implementation consists of two parts: A compiler that compiles source code to byte code, and a virtual machine that links compiled code with externals at runtime (functions, classes, etc) and executes its byte code.

Example

Sum the contents of a text file that contains integers:

#define IO_READ 1
#define IO_WRITE 2

int sum_file(string filename)
{
	File file = new File();
	if (!file->open(filename, IO_READ))
	{
		print("Error: Unable to open file");
		delete file;
		return -1;
	}

	int sum = 0;
	while (!file->eof())
	{
		sum = sum + file->read_int();
	}
	
	file->close();
	delete file;
	return sum;
}

print("The sum of the file's contents is %d.\n", sum_file("inputfile.txt"));

				// Generated assembly:
// Line 4: int sum_file(string filename)
// Function int sum_file
0x0	jmp                 0x42
// Arg: string filename
// Line 6: File file = new File();
0x2	pushc               0x10000
// Call ctor File
0x4	call_linked_cc      0x10000
# Pop to File file with id 65537 (0x10001)
0x6	popv                0x10001
// Line 7: if (!file->open(filename, 1))
// Call function open in File
// Arg index 0 = filename
0x8	pushv               0x10000
// Arg index 1 = 1
# Push constant int = 1 with id 131072 (0x20000)
0xA	pushc               0x20000
0xC	load_v_addr         0x10001
0xE	call_lcfunc         0x0
0x10	not                 
0x11	jz                  0x20
// Line 9: print("Error: Unable to open file");
// Call linked function print with id 1
# Push constant string = "Error: Unable to open file" with id 131073 (0x20001)
0x13	pushc               0x20001
0x15	call_lfunc          0x6
0x17	pop                 
// Line 10: delete file;
0x18	pushv               0x10001
0x1A	call_linked_dc      
// Line 11: return -1;
# Push constant int = 1 with id 131072 (0x20000)
0x1B	setv_c              0x10003 0x6
# Reduced and simplified constant NEG 1 to pushc -1 id = 0x6
# Reduced PUSHC, POPV
0x1E	jmp                 0x40
// Line 12: }
// Line 14: int sum = 0;
# Push constant int = 0 with id 65537 (0x10001)
0x20	setv_c              0x10002 0x10001
# Pop to int sum with id 65538 (0x10002)
# Reduced PUSHC, POPV
// Line 15: while (!file->eof())
// condition !file->eof()
// Call function eof in File
0x23	load_v_addr         0x10001
0x25	call_lcfunc         0x2
0x27	not                 
0x28	jz                  0x35
// Line 17: sum = sum + file->read_int();
0x2A	pushv               0x10002
// Call function read_int in File
0x2C	load_v_addr         0x10001
0x2E	call_lcfunc         0x3
0x30	add                 
# Pop to int sum with id 65538 (0x10002)
0x31	popv                0x10002
// Line 18: }
0x33	jmp                 0x23
// Line 20: file->close();
// Call function close in File
0x35	load_v_addr         0x10001
0x37	call_lcfunc         0x1
0x39	pop                 
// Line 21: delete file;
0x3A	pushv               0x10001
0x3C	call_linked_dc      
// Line 22: return sum;
0x3D	setv_v              0x10003 0x10002
# Reduced PUSHV, POPV
// Line 23: }
0x40	ret_func            0x10003
// End function int sum_file
// Line 25: print("The sum of the file's contents is %d.\n", sum_file("inputfile.txt"));
// Call linked function print with id 1
# Push constant string = "The sum of the file's contents is %d.
" with id 0 (0x0)
0x42	pushc               0x0
// Call function sum_file at loc 0
0x44	frame_create        0x1
// Arg 0x10000 filename = "inputfile.txt"
# Push constant string = "inputfile.txt" with id 1 (0x1)
0x46	setv_c              0x10000 0x1
# Reduced PUSHC, POPV
0x49	call_func           0x2
0x4B	frame_destroy       0x1
0x4D	pushc               0x2
0x4F	call_lfunc          0x1
0x51	pop

Compiler

The compiler runs a preprocessor on each input file, then does a two-pass compilation of each translation unit using a recursive descent parser. The first pass identifies signatures and types of functions, structures, classes and variables. The second pass is code generation. Next, the following optimizations are applied in multiple passes:

Constant folding and propagation (algebraic simplification)

For example, the expression "2 * ((13 - 4) * 142 + 3 * i + (133 * 23 * (4 - 2) - 3 * (20 + 40)))" is simplified to "a * i + b" where a and b are constants. Similar folding optimizations are applied to binary, comparison and other operators acting on constants.

// Line 6: res = 2 * ((13 - 4) * 142 + 3 * i + (133 * 23 * (4 - 2) - 3 * (20 + 40)));
# Push constant int = 3 with id 65537 (0x10001)
0xF	pushc               0x10001
0x11	pushv               0x10000
0x13	mul                 
# Push constant int = 2 with id 65538 (0x10002)
0x14	pushc               0xB
# Push constant int = 13 with id 65539 (0x10003)
# Push constant int = 4 with id 65540 (0x10004)
# Reduced and simplified constant SUB 4, 13 to pushc 9 id = 0x2
# Push constant int = 142 with id 65541 (0x10005)
# Reduced and simplified constant MUL 142, 9 to pushc 1278 id = 0x3
# Push constant int = 133 with id 65542 (0x10006)
# Push constant int = 23 with id 65543 (0x10007)
# Reduced and simplified constant MUL 23, 133 to pushc 3059 id = 0x4
# Push constant int = 4 with id 65540 (0x10004)
# Push constant int = 2 with id 65538 (0x10002)
# Reduced and simplified constant SUB 2, 4 to pushc 2 id = 0x5
# Reduced and simplified constant MUL 2, 3059 to pushc 6118 id = 0x6
# Push constant int = 3 with id 65537 (0x10001)
# Push constant int = 20 with id 65544 (0x10008)
# Push constant int = 40 with id 65545 (0x10009)
# Reduced and simplified constant ADD 40, 20 to pushc 60 id = 0x7
# Reduced and simplified constant MUL 60, 3 to pushc 180 id = 0x8
# Reduced and simplified constant SUB 180, 6118 to pushc 5938 id = 0x9
# Reduced and simplified constant ADD 5938, 1278 to pushc 7216 id = 0xA
# Reduced and simplified constant MUL 7216, 2 to pushc 14432 id = 0xB
0x16	add                 
# Pop to int res with id 1 (0x1)
0x17	popv                0x1

Stack frame optimizations

Consider the expression "func(x) + func(y)". An obvious peephole optimization is to allow the second call to func to reuse the stack frame that was created for the first call for func, only updating the parameter passed. A more complex example involves live variable analysis. Consider the quicksort routine:

void qsort(int l, int r)
{
	int i = l, j = r;
	int v = A[(l + r) / 2];
	while (i <= j)
	{
		while (A[i] < v) { i++; }
		while (A[j] > v) { j--; }
		if (i > j) { break; }

		int v = A[i]; A[i] = A[j]; A[j] = v;
		i++;
		j--;
	}
	if (l < j) { qsort(l, j); }
	if (r > i) { qsort(i, r); }
}

At the first recursive call to qsort, the variables i and r are still "alive" (as determined by data flow analysis), so their values are saved. At the second recursive call to qsort, no variables need to be saved since they are no longer live at the point of the function call. FVM will re-use the memory locations occupied by l, j, i, r and v. I do not pass the array in the function as, currently, all variables are passed by value, and copying a very large array repeatedly is not a good idea.

The last optimization consists of replacing redundant instructions, such as a jmp to a location immediately following itself, or instructions for which a mnemonic exists, such as replacing pushv(x), incr, popv(x) with incrv(x).

The final list of instructions is assembled to byte code.

Linker (C++)

The following is an example of linking functions, variables and classes to FVM.

class File
{
public:
	bool open(const char* filename, int mode) { ifs.open(filename); return ifs.is_open(); }
	bool eof() { return ifs.eof(); }
	void close() { ifs.close(); }

	int read_int() { int v; ifs >> v; return v; }

private:
	std::ifstream ifs;
};

class TestClass
{
public:
	void test() { std::cout << "abc" << std::endl; }
	void print_message(int n) { std::cout << "Test class, test message = " << n << std::endl; }
	int test_value() { return 234; }

	TestClass(int v) : val(v) { std::cout << "TestClass ctor" << std::endl; }
	~TestClass() { std::cout << "TestClass dtor" << std::endl; }

	int val;
};

TestClass* create_test_class(int value) { return new TestClass(value); }
void print_test_class(TestClass* t) { std::cout << t->val << std::endl; }

...

Compiler c;
c.LinkFunction(new LinkedFunction1<float, float>(sinf, "sin"));
c.LinkFunction(new LinkedFunction1<float, float>(cosf, "cos"));

LinkedClass<TestClass>* TestClass_linked = new LinkedClass1<TestClass, int>("TestClass");
TestClass_linked->LinkClassFunction(new LinkedClassFunction0<TestClass, void>(&TestClass::test, "test"));
TestClass_linked->LinkClassFunction(new LinkedClassFunction0<TestClass, int>(&TestClass::test_value, "test_value"));
TestClass_linked->LinkClassFunction(new LinkedClassFunction1<TestClass, void, int>(&TestClass::print_message, "print_message"));
c.LinkClass(TestClass_linked);
c.LinkFunction(new LinkedFunction1<TestClass*, int>(create_test_class, "create_test_class"));
c.LinkFunction(new LinkedFunction1<void, TestClass*>(print_test_class, "print_test_class"));

LinkedClass<File>* FileClass_linked = new LinkedClass0<File>("File");
FileClass_linked->LinkClassFunction(new LinkedClassFunction2<FileClass, bool, const char*, int>(&FileClass::open, "open"));
FileClass_linked->LinkClassFunction(new LinkedClassFunction0<FileClass, void>(&File::close, "close"));
FileClass_linked->LinkClassFunction(new LinkedClassFunction0<FileClass, bool>(&File::eof, "eof"));
FileClass_linked->LinkClassFunction(new LinkedClassFunction0<FileClass, int>(&File::read_int, "read_int"));
c.LinkClass(FileClass_linked);

Program* prog = c.Compile("test.txt");
prog->GetVariable("from_prog")->set_int(123);
vm.Run(prog, prog->namespaces, false);
	
float a =(float)*prog->GetVariable("a");
float b = (float)*prog->GetVariable("b");
std::vector<int> bc = Variable::ArrayToType<int>(*prog->GetVariable("bc"));
std::vector<const char*> test_strings = Variable::ArrayToType<const char*>(*prog->GetVariable("s"));
std::vector<float> arr = Variable::ArrayToType<float>(*prog->GetVariable("arr"));

delete prog;

C++ integration allows C++ code to interact with the VM by linking functions and classes, and by passing or retrieving variables. Cast operators are provided for primitive datatypes, while explicit methods exist to quickly convert arrays and structures from the VM's memory to and from types that are compatible with the C++ program. This is especially useful for function calls or calling class methods from within byte code running in the VM, or when passing complex structures to the VM. Structures (classes without methods) can be defined from within source code compiled in FVM and can be passed to and from C++ code through a custom function that uses FVM's Variable data type as a parameter to interface members.

Benchmarks with Lua and Python

The height of each bar represents the time taken to run a script. Lower is better.

Optimization

A loop with an expression that can be partly folded. Since this can be folded to a trivial expression, FVM's advantage increases as the complexity of the function increases.

int num = 10000000, res = 0;

for (int i = 0; i < num; i++)
{
	res = 2 * ((13 - 4) * 142 + 3 * i + (133 * 23 * (4 - 2) - 3 * (20 + 40)));
}

Show Lua code

Hide Lua code

N = 10000000 - 1
res = 0
for i = 0, N do
	res = 2 * ((13 - 4) * 142 + 3 * i + (133 * 23 * (4 - 3) - 3 * (20 + 40)))
end

Show Python code

Hide Python code

num = 10000000 - 1
res = 0
for i in range(num):
	res = 2 * ((13 - 4) * 142 + 3 * i + (133 * 23 * (4 - 3) - 3 * (20 + 40)))

Recursive functions

Calculate fib(32) of the Fibonacci sequence recursively.

int fib(int n)
{
	if (n < 2) { return 1; }
	return fib(n - 1) + fib(n - 2);
}
print_int(fib(32));

Show Lua code

Hide Lua code

function fib(n)
	if n < 2 then return 1 end
	return fib(n - 1) + fib(n - 2)
end
print(fib(32))

Show Python code

Hide Python code

def fib(n):
	if n < 2: return 1
	return fib(n - 1) + fib(n - 2)
print(fib(32))

Sorting

Sort 50,000 integers using quicksort.

int length = 50000;
int A[length];
for (int i = 0; i < length; i++)
{
	A[i] = (43 * i) % 50;
}

void qsort(int l, int r)
{
	int i = l, j = r;
	int v = A[(l + r) / 2];

	while (i <= j)
	{
		while (A[i] < v) { i++; }
		while (A[j] > v) { j--; }
		if (i > j) { break; }
		int v = A[i]; A[i] = A[j]; A[j] = v;
		i++;
		j--;
	}

	if (l < j) { qsort(l, j); }
	if (r > i) { qsort(i, r); }
}

qsort(0, length - 1);

Show Lua code

Hide Lua code

length = 50000
A = {}
function quicksort(l, r)
	i = l
	j = r
	v = A[math.floor((l + r) / 2)]
	while i <= j do
		while A[i] < v do i = i + 1 end
		while A[j] > v do j = j - 1 end
		if i > j then break end
		temp = A[i]
		A[i] = A[j]
		A[j] = v
		i = i + 1
		j = j - 1
	end
	if l < j then quicksort(l, j) end
	if r > i then quicksort(i, r) end
end

for i = 0, length do A[i] = (43 * i) % 50 end
quicksort(0, length - 1)

Show Python code

Hide Python code

num = 50000
arr = list(range(num))
for i in range(num): arr[i] = (43 * i) % 50

def qsort(l, r):
	i = 1
	j = r
	v = arr[(int)((l + r) / 2)]
	while i <= j:
		while arr[i] < v: i += 1
		while arr[j] > v: j -= 1
		if i > j: break
		v = arr[i]
		arr[i] = arr[j]
		arr[j] = v
		i += 1
		j -= 1
	if (l < j): qsort(l, j)
	if (r > i): qsort(i, j)

qsort(0, num - 1)

Tables

int num = 300;
int steps = 5000;
float arr[num];
for (int i = 0; i < num; i++)
{
	float v = 0;
	for (int j = 0; j < steps; j++)
	{
		float num = 3.14159f * j;
		num = num / steps;
		v = 2 * sin(i * v) + cos(num) - 2 * cos(v);
	}
	if (v < 0) { v = -v; }
	arr[i] = v;
}

Show Lua code

Hide Lua code

num = 300
steps = 5000
arr = {}
for i = 0, num do
	v = 0.0
	for j = 0, steps do
		num = 3.14159 * j
		num = num / steps
		v = 2 * math.sin(i * v) + math.cos(num) - 2 * math.cos(v)
		if v < 0 then v = -v end
		arr[i] = v
	end
end

Show Python code

Hide Python code

num = 300
steps = 5000
arr = list(range(num))
for i in range(num):
	v = 0.0
	for j in range(steps):
		num = 3.14159 * j
		num = num / steps
		v = 2 * math.sin(i * v) + math.cos(num) - 2 * math.cos(v)
		if (v < 0):
			v = -v
		arr[i] = v

Conclusion

Although FVM produces fairly well-optimized assembly, Lua is significantly faster due to the fact that its stack-based VM uses simpler types in memory, and because it directly makes use of registers. From what could be determined through benchmarking, Python's optimization is far from comparable, leaving its VM with a lot more bloat to deal with. However, FVM's main advantage over these embeddable languages is its ease of integration with C++ code bases.