/*
Author: Nathaniel Huesler
Year: 2025
Copywrite Notice: LICENSE.txt 
*/

/*

This is the main point of entry to the simulator. Note that the program entry point is in win.cpp, which
calls entry(...) at the end of this file. This file parses command line arguments and puts everthing together. 

*/

/*

TODO: power analysis
	- switch power: power depends on the output capacitance, which depends on the fannout and interconnect. Igore interconnect.
	- short-circuit power: power depends on the rise & fall times of the input & outputs. Depends on fan-in and fan-out. 
	- power metrics:
		--- pre-specified ---
		- power voltage (Vdd)
		- unit static energy (uE_st)
		- unit gate capacitance (uC_g)
		- unit short circuit current (uIsc)
		- unit transition time (u_tau)
		
		--- computed ---
				- fan out: f_o
				- switch-count
		- load capacitance: C_l = uC_g*f_o
		- input transtion time: tau_i = u_tau;
		- output transition time: tau_o = f_o*u_tau
		- transition ratio: tr = tau_i/tau_o
		- short-circuit enegy: E_sc = Vdd*uIsc*tr
		
		- norm switch enegery: nE_sw = SUM(switch_count * fan_o)
		- switch energy = nE_sw*uC_g*Vdd^2
		
		
TODO: data collection
	- time line:
		time , norm static power , norm switch power
	- instr stage csv:
		start time , stead state time , end time , stage id , PC
	
NOTE: implemented data collection:
	- input and output samples as WAV.
*/

#define PRAGMA_OPTIMISE on

#define IMPLEMENT_PLATFORM_VIRTUAL_TABLE
#define USE_SPARSE_ASSERTIONS 1

#define STB_TRUETYPE_IMPLEMENTATION
#define STBTT_assert(expr)    if(!(expr)) *(int*)0 = 0

#include "extern_libs/stb_truetype.h"

#include "stdio.h"
#include "math.h"
#include "util.cpp"

#include "platform.cpp"
#include "bitmap.cpp"

#include "debug.h"
#include "math_.h"
#include "input.h"
//#include "bitmap.h"
#include "sound.h"
#include "font.h"
#include "renderer.h"
#include "render_buffer.h"
#include "user_interface.h"

#include "math.cpp"
#include "sound.cpp"
#include "font.cpp"
#include "render_core.cpp"
#include "render_buffer.cpp"
#include "renderer.cpp"
#include "user_interface.cpp"

#pragma optimize("" , PRAGMA_OPTIMISE)

struct Console
{
	char * text;
	u32 size;
	u32 capacity;
};

Console global_retained_console = {};
Console global_transient_console = {};

void output_to_console__(char * message , Console * console)
{
	
	u32 size = console -> size;
	u32 capacity = console -> capacity;
	u32 message_size = str_size(message);
	char * text = console -> text;
	char * curr_char = text + size;
	
	assert(size+message_size < capacity);
	
	curr_char += mem_copy(curr_char , message , message_size);
	*curr_char = 0;
	
	console -> size = curr_char - text;
}

void output_to_console_(char * message , u32 mode)
{
	if(mode) 	output_to_console__(message , &global_retained_console);
	else 		output_to_console__(message , &global_transient_console);
}


#define output_to_console(message , mode , ...) {char ___buffer___[128] = {}; str_format(___buffer___ , message , __VA_ARGS__); output_to_console_(___buffer___ , mode);}
#define report_error(message , mode , ...) {char ___buffer___[128] = {}; str_format(___buffer___ , "error: "message"\n" , __VA_ARGS__); output_to_console_(___buffer___ , mode);}


struct DebugTimer
{
	u64 start;
	u64 end;
	char * name;
};

void start_timer(DebugTimer * timers , u32 * count_ptr ,  char * name)
{
	
	u32 count = *count_ptr;
	
	if(count)
	{
		if(!timers[count-1].end)
		{
			timers[count-1].end = __rdtsc();
		}
	}
	
	timers[count].start = __rdtsc();
	timers[count].name = name;
	
	*count_ptr = ++count;
}

#include "sim.cpp"
#include "compiler.cpp"
#include "mips_gen.cpp"
#include "debug_display.cpp"
#include "preprocessor.cpp"


struct UserData
{
	Mem mem;
	Font font;
	u32 step_index;
	u32 sw_instr_index;
	
	u32 target_step_index;
	u32 target_sw_instr_index;
	u32 playing;
	
	u32 tab;
	f32 mem_scroll;
	f32 instr_scroll;
	
	Vec2 signal_backtrace_view_offset;
	f32 signal_backtrace_view_scale;
	
	u32 selected_module_index;
	u32 break_on_last_hw_instr;
	u32 break_button_state;
	
	CircuitAddress selected_addr;
	
	char * input_file;
	u32 input_file_size;
	
	Token * input_tokens;
	u32 input_token_count;
	
	Instr * instrs;
	u32 instr_count;
	
	Tag * tags;
	u32 tag_count;
	
	DataPathPair * data_path_pairs;
	u32 data_path_pair_count;
	
	MipsSpec mips_spec;
	CircuitProgram prog;
	CircuitState circuit_states[3];
	EventBuffer event_buffer;
	StatTracker stat_tracker;
	
	File frame_power_file;
	i32 last_frame_power_time;
	
	File module_power_file;
	i32 last_module_power_time;
	
	File addr_power_file;
	i32 last_addr_power_time;
	
	File instr_stage_file;
	i32 last_instr_stage_time;
	
	
};


void read_input_file(char * filename , void * buffer , u32 size , u32 offset)
{
	if(!read_file(filename , buffer , size , offset))
	{
		
		report_error("Could not read file %: %" , 1 , filename , get_prev_error().message);
	}
}


inline
void write_nurmeric_values(u8 * mem , u32 size)
{
	for(u32 i = 0; i < size; i++) mem[i] = i;
}


void allocate_circuit_state_memory(
								   CircuitState * state , u32 mem_size , Mem * arena)
{
	
	u8 * mem = mem_push(arena , u8 , mem_size);
	reset_circuit_mem(mem , mem_size , 0 , 0 , 0);
	state -> mem = mem;
	state -> size = mem_size;
}


inline
void set_circuit_state_pointers(
								CircuitState ** prev_mem , CircuitState ** curr_mem , CircuitState ** next_mem , 
								CircuitState * circuit_states , u32 start_index)
{
	
	u32 prev_state_index = (start_index + 0)%3;
	u32 curr_state_index = (start_index + 1)%3;
	u32 next_state_index = (start_index + 2)%3;
	
	*prev_mem = circuit_states + prev_state_index;
	*curr_mem = circuit_states + curr_state_index;
	*next_mem = circuit_states + next_state_index;
}


#if 0
{
	
	RegisterSpec * regs;
	u32 reg_count;
	
	DataPathPair * pairs;
	u32 pair_count;
	
	u32 * prim_inputs;
	u32 prim_input_count;
	
	u32 * prim_outputs;
	u32 prim_output_count;
	
	for(u32 output_index = 0; output_index < prim_output_count; output_index++)
	{
		
		CircuitAddress output_addr = push_addr(prog , reg_width);
		
		
	}
	
}
#endif

void compile_sim(char * input_filename , UserData * data , Mem * arena)
{
	
	u32 input_file_size = 0; 
	void * input_file = 0;
	
	Token * input_tokens = 0;
	u32 input_token_count = 0;
	
	UninterpretedInstr * imm_instrs = 0;
	u32 imm_instr_capacity = 0;
	u32 imm_instr_count = 0;
	
	Instr * instrs = 0;
	u32 instr_capacity = 0;
	u32 instr_count = 0;
	
	Tag * tags = {};
	u32 tag_capacity = 0;
	u32 tag_count = 0;
	
	DataPathPair * data_path_pairs = 0;
	u32 data_path_pair_count = 0;
	
	MipsSpec mips_spec = {};
	CircuitProgram prog = {};
	EventBuffer event_buffer = {};
	StatTracker stat_tracker = {};
	
	
	CircuitState circuit_states[3] = {};
	u32 circuit_mem_size = 0;
	
	// NOTE: Memory Allocation
	
	input_file_size = get_file_size_by_name(input_filename);
	input_file = mem_push_clear(arena , void , input_file_size+1);
	
	imm_instr_capacity = 64;
	imm_instrs = mem_push_array(arena , UninterpretedInstr , imm_instr_capacity);
	
	instr_capacity = 64;
	instrs = mem_push_array(arena , Instr , instr_capacity);
	
	tag_capacity = 64;
	tags = mem_push_array(arena , Tag , tag_capacity);
	
	mips_spec.lines = mem_push_sub(arena , MB(1));
	mips_spec.regs = mem_push_sub(arena , MB(1));
	mips_spec.stage_lines = mem_push_sub(arena , MB(1));
	mips_spec.ports = mem_push_sub(arena , MB(1));
	mips_spec.fields = mem_push_sub(arena , MB(1));
	mips_spec.instr_set = mem_push_sub(arena , MB(1));
	mips_spec.paths = mem_push_sub(arena , MB(1));
	mips_spec.instr_spec_ctr_line_arena = mem_push_sub(arena , MB(1));
	
	prog.curr_byte_addr = 1;
	
	prog.hw_instr_capacity = 1024;
	prog.hw_instrs = mem_push_array(arena , HardwareInstr , prog.hw_instr_capacity);
	
	prog.port_capacity = 64;
	prog.ports = mem_push_array(arena , CircuitPort , prog.port_capacity);
	
	prog.input_vec_capacity = 1024;
	prog.input_vecs = mem_push_array(arena , CircuitInputVector , prog.input_vec_capacity);
	
	prog.circuit_mem_info_capacity = 1024;
	prog.circuit_mem_info = mem_push_array(arena , CircuitMemoryInfo , prog.circuit_mem_info_capacity);
	
	prog.module_capacity = 64;
	prog.modules = mem_push_array(arena , CircuitModule , prog.module_capacity);
	
	prog.module_link_capacity = 64;
	prog.module_links = mem_push_array(arena , CircuitModuleLink , prog.module_link_capacity);
	
	prog.module_addr_list_arena = mem_push_sub(arena , MB(4));
	
	prog.module_stack_capacity = 16;
	prog.module_stack = mem_push_array(arena , u32 , prog.module_stack_capacity);
	
	prog.test_vec_capacity = 64;
	prog.test_vecs = mem_push_array(arena , TestVector , prog.test_vec_capacity);
	
	prog.test_vec_expr_capacity = 128;
	prog.test_vec_exprs = mem_push_array(arena , TestVectorExpr , prog.test_vec_expr_capacity);
	
	prog.test_vec_buffer_mem_capacity = 1024;
	prog.test_vec_buffer_mem = mem_push_array(arena , SignalLog , prog.test_vec_buffer_mem_capacity);
	
	prog.trace_capacity = 64;
	prog.traces = mem_push_array(arena , SignalTrace , prog.trace_capacity);
	
	prog.trace_result_capacity = 128;
	prog.trace_results = mem_push_array(arena , SignalTraceResult , prog.trace_result_capacity);
	
	prog.path_mem_capacity = KB(1);
	prog.path_mem = mem_push_array(arena , u8 , prog.path_mem_capacity);
	
	event_buffer.event_capacity = 1024*64;
	event_buffer.events = mem_push_array(arena , CircuitEvent , event_buffer.event_capacity);
	
	event_buffer.power_capacity = 1024*64;
	event_buffer.power_events = mem_push_array(arena , CircuitPowerEvent , event_buffer.power_capacity);
	
	event_buffer.fanout_capacity = 1024*64;
	event_buffer.fanout_events = mem_push_array(arena , CircuitFanoutEvent , event_buffer.fanout_capacity);
	
	event_buffer.time_mark_capacity = 1024*2;
	event_buffer.time_marks = mem_push_array(arena , CircuitEventTimeMark , event_buffer.time_mark_capacity);
	
	stat_tracker.frame_capacity = 1024;
	stat_tracker.frames = mem_push_array(arena , StatFrame , stat_tracker.frame_capacity);
	
	stat_tracker.unit_static_energy = 0;
	stat_tracker.unit_gate_capacitance = 1;
	
	
	// NOTE: Compilation.
	define_mips_spec(&mips_spec , arena);
	
	read_input_file(input_filename , input_file , input_file_size , 0);
	get_tokens(&input_tokens , &input_token_count , arena , (char*)input_file , (char*)input_file + input_file_size);
	parse_MIPS(input_tokens , input_token_count , imm_instrs , &imm_instr_count , imm_instr_capacity , arena);
	interpret_MIPS(imm_instrs , imm_instr_count , instrs , &instr_count , instr_capacity , tags , &tag_count , tag_capacity , mips_spec.regs , mips_spec.instr_set);
	encode_MIPS(instrs , instr_count , mips_spec.instr_set , mips_spec.fields);
	
	get_data_path(instrs , instr_count , &data_path_pairs , &data_path_pair_count , arena);
	//match_resources(data_path_pairs , data_path_pair_count , arena);
	
	
#if 0
	
	CircuitAddress * primary_inputs = 0;
	CircuitAddress * primary_outputs = 0;
	u32 primary_input_count = 0;
	u32 primary_output_count = 0;
	
	u32 circuit_size_in_bits = max = generate_hardware_instructions(
																	&hw_instrs , &hw_instr_count , 
																	&primary_inputs , &primary_input_count , 
																	&primary_outputs , &primary_output_count , 
																	instrs , instr_count , 
																	data_path_pairs , pair_count , arena);
	
	
	circuit_mem_size = get_byte_width(circuit_size_in_bits);
	circuit_mem_A = (u8*)push_memory(arena , circuit_mem_size);
	circuit_mem_B = (u8*)push_memory(arena , circuit_mem_size);
	
	//mem_set(circuit_mem_A , 0 , circuit_mem_size);
	//mem_set(circuit_mem_B , 0 , circuit_mem_size);
	
	write_nurmeric_values(circuit_mem_A , circuit_mem_size);
	write_nurmeric_values(circuit_mem_B , circuit_mem_size);
	
#endif
	
	MIPS_generate(
				  &prog , &mips_spec , 
				  instrs , instr_count , arena
				  );
	
	// NOTE: sim allocation.
	circuit_mem_size = prog.curr_byte_addr;
	allocate_circuit_state_memory(
								  circuit_states + 0 , circuit_mem_size , arena);
	
	allocate_circuit_state_memory(
								  circuit_states + 1 , circuit_mem_size , arena);
	
	allocate_circuit_state_memory(
								  circuit_states + 2 , circuit_mem_size , arena);
	
	//sort_hardware_instructions(hw_instrs , hw_instr_count , arena);
	
	open_ports(prog.ports , prog.port_count , arena);
	
	data -> target_step_index = 0;
	data -> target_sw_instr_index = 0;
	data -> selected_module_index = (u32)-1;
	data -> signal_backtrace_view_scale = 1.0f;
	
	data -> input_file = (char*)input_file;
	data -> input_file_size = input_file_size;
	
	data -> input_tokens = input_tokens;
	data -> input_token_count = input_token_count;
	
	data -> instrs = instrs;
	data -> instr_count = instr_count;
	
	data -> tags = tags;
	data -> tag_count = tag_count;
	
	data -> data_path_pairs = data_path_pairs;
	data -> data_path_pair_count = data_path_pair_count;
	
	data -> mips_spec = mips_spec;
	data -> prog = prog;
	
	data -> circuit_states[0] = circuit_states[0];
	data -> circuit_states[1] = circuit_states[1];
	data -> circuit_states[2] = circuit_states[2];
	
	data -> event_buffer = event_buffer;
	data -> stat_tracker = stat_tracker;
}

struct PerformSimStepsResult 
{
	u32 step_index;
	u32 sw_instr_index;
	i32 time;
};


i32 get_circuit_time(u32 step_index , u32 hw_instr_count)
{
	return(step_index / hw_instr_count);
}

u32 is_circuit_steady_state(CircuitState * curr_state , CircuitState * next_state , u32 circuit_mem_size)
{
	u32 steady_state = mem_eql(
							   curr_state -> mem , 
							   next_state -> mem , 
							   circuit_mem_size
							   );
	
	return(steady_state);
}

#pragma optimize("" , on)



PerformSimStepsResult perform_sim_steps(
										u32 mode , 
										u32 step_index , u32 target_step_index , 
										u32 sw_instr_index , u32 target_sw_instr_index , 
										f32 event_buffer_utilisation , 
										HardwareInstr * hw_instrs , u32 hw_instr_count , CircuitState * circuit_states , 
										CircuitPort * ports , u32 port_count , 
										CircuitInputVector * input_vecs , u32 input_vec_count , 
										CircuitMemoryInfo * mem_info , u32 mem_info_count , 
										EventBuffer * event_buffer , InstrTracker * instr_tracker)
{
	
	i32 time = 0;
	CircuitState * prev_state = {};
	CircuitState * curr_state = {};
	CircuitState * next_state = {};
	
	u32 count = 0;
	u64 p_start = 0;
	
	u32 target_event_count = event_buffer -> event_capacity*event_buffer_utilisation;
	u32 event_count = 0;
	u32 last_evt = event_buffer -> event_end_index;
	u32 evt_capacity = event_buffer -> event_capacity;
	
	u64 start = __rdtsc();
	
	while(step_index < target_step_index && sw_instr_index < target_sw_instr_index)
	{
		
		
		
		u32 hw_instr_index = step_index % hw_instr_count;
		u32 next_hw_instr_index = (step_index+1) % hw_instr_count;
		
		time = get_circuit_time(step_index , hw_instr_count);
		push_time_marks(time , event_buffer);
		
		set_circuit_state_pointers(
								   &prev_state , &curr_state , &next_state , circuit_states , time
								   );
		
		p_start -= __rdtsc();
		perform_hw_instruction(
							   hw_instr_index , hw_instrs , hw_instr_count , 
							   prev_state , curr_state , next_state ,
							   ports , port_count , input_vecs , input_vec_count , 
							   mem_info , mem_info_count , 
							   event_buffer , time
							   );
		p_start += __rdtsc();
		
		if(next_hw_instr_index == 0)
		{
			if(end_of_sw_instr(instr_tracker , prev_state , curr_state))
			{
				sw_instr_index++;
			}
			
			if(mode && event_count >= target_event_count)
			{
				break;
			}
		}
		
		//update_time_marks();
		step_index++;
		
		u32 end_evt = event_buffer -> event_end_index;
		
		event_count += end_evt < last_evt ? 
		(end_evt + evt_capacity - last_evt) : (end_evt - last_evt);
		
		last_evt = end_evt;
		
		count++;
	}
	
	u64 end = __rdtsc();
	
	if(count)
	{
		u32 avg_p = p_start / count;
		u32 instr_per_step = (end-start) / count;
		output_to_console("cpu per step % , p avg = %\n" , 0 , instr_per_step , avg_p);
	}
	
	PerformSimStepsResult result = {};
	
	result.step_index = step_index;
	result.sw_instr_index = sw_instr_index;
	result.time = get_circuit_time(step_index , hw_instr_count);
	
	return(result);
}


#pragma optimize("" , PRAGMA_OPTIMISE)


inline
void update_csv_files(UserData * data , StatTracker * stats , InstrTracker * instr_tracker , EventBuffer * event_buffer)
{
	
	File * frame_power_file = &data -> frame_power_file;
	File * module_power_file = &data -> module_power_file;
	File * addr_power_file = &data -> addr_power_file;
	File * instr_stage_file = &data -> instr_stage_file;
	
	i32 * last_frame_power_time = &data -> last_frame_power_time;
	i32 * last_module_power_time = &data -> last_module_power_time;
	i32 * last_addr_power_time = &data -> last_addr_power_time;
	i32 * last_instr_stage_time = &data -> last_instr_stage_time;
	
	StatFrame * frames = stats -> frames;
	u32 frame_count = stats -> frame_count;
	
	Mem module_power_arena = stats -> module_power_arena;
	Mem addr_power_arena = stats -> addr_power_arena;
	
	update_frame_power_csv(frame_power_file , last_frame_power_time , frames , frame_count);
	update_module_power_csv(module_power_file , last_module_power_time , module_power_arena);
	update_addr_power_csv(addr_power_file , last_addr_power_time , addr_power_arena);
	update_stage_csv(instr_stage_file , last_instr_stage_time , instr_tracker , event_buffer);
}	



#define SIM_TIME_RATE 44100*22*14*1.25




extern "C" __declspec( dllexport )
int entry(
		  int argc , char ** argv , u32 * running , 
		  PlatformScreenBuffer screen_buffer , 
		  InputState * input_state , f32 delta_time , 
		  PlatformProcedures * procs , void ** user_data_ptr)
{
	
	set_platform_virtual_table(procs);
	UserData * data = (UserData*)(*user_data_ptr);
	
	if(!data)
	{
		Mem mem = allocate_memory(MB(64) + sizeof(UserData) , TB(1) , 0);
		Mem arena = {mem.base , mem.capacity};
		
		global_retained_console.size = 0;
		global_retained_console.capacity = KB(8);
		global_retained_console.text = mem_push_array(&arena , char , global_retained_console.capacity);	
		
		global_transient_console.size = 0;
		global_transient_console.capacity = KB(8);
		global_transient_console.text = mem_push_array(&arena , char , global_transient_console.capacity);	
		
		char curr_dir[128] = {}; get_dir(curr_dir , array_size(curr_dir));
		output_to_console("dir = %\n" , 1 , curr_dir);
		
		data = mem_push_struct(&arena , UserData);
		Mem temp_arena = mem_push_sub(&arena , MB(1));
		
		Font font =  load_font("C:/Windows/Fonts/courbd.ttf" , 16 , 32 , &arena , &temp_arena , 0 , 128);
		
		data -> font = font;
		WAV_to_port("white-noise.wav" , "port0" , SIM_TIME_RATE , &arena);
		compile_sim("FIR2.asm" , data , &arena);
		data -> mem = {mem_end(arena , void) , mem_rem(arena , u8)};
		
		*user_data_ptr = data;
		
		u32 target_f = 44100;
		u32 instr_per_sample = 14;
		u32 avg_step_per_instr = 22;
		
		u32 target_instr_f = target_f*instr_per_sample;
		u32 target_step_f = target_instr_f*avg_step_per_instr;
		
		u32 cpu_f = 3500000000;
		u32 target_cpu_per_step = cpu_f / target_step_f;
		
		output_to_console("target cpu per step %\n" , 1 , target_cpu_per_step);
		
		data -> frame_power_file = start_csv_file("frame_power.csv" , "time , norm static energy , norm switch energy , norm short circuit energy");
		data -> module_power_file = start_csv_file("module_power.csv" , "time , module_id , norm static energy , norm switch energy , total switches , total fanout");
		data -> addr_power_file = start_csv_file("addr_power.csv" , "time , addr min , addr max , total switches , total fanout");
		data -> instr_stage_file = start_csv_file("instr_stages.csv" , "start time , steady state time , end time , stage id , PC");
	}
	
	global_transient_console.size = 0;
	
	Mem mem = data -> mem;
	Mem arena = {mem.base , mem.capacity};
	mem_set_wide(arena.base , 0 , arena.capacity);
	
	u32 target_step_index = data -> target_step_index;
	u32 target_sw_instr_index = data -> target_sw_instr_index;
	u32 playing = data -> playing;
	
	u32 step_index = data -> step_index;
	u32 sw_instr_index = data -> sw_instr_index;
	
	u32 selected_module_index = data -> selected_module_index;
	u32 tab = data -> tab;
	f32 mem_scroll = data -> mem_scroll;
	f32 instr_scroll = data -> instr_scroll;
	Vec2 signal_backtrace_view_offset = data -> signal_backtrace_view_offset;
	f32 signal_backtrace_view_scale = data -> signal_backtrace_view_scale;
	u32 break_on_last_hw_instr = data -> break_on_last_hw_instr;
	u32 break_button_state = data -> break_button_state;
	CircuitAddress selected_addr = data -> selected_addr;
	
	Instr * instrs = data -> instrs;
	u32 instr_count = data -> instr_count;
	MipsSpec * mips_spec = &data -> mips_spec;
	CircuitProgram * prog = &data -> prog;
	CircuitState * circuit_states = data -> circuit_states;
	EventBuffer * event_buffer = &data -> event_buffer;
	StatTracker * stat_tracker = &data -> stat_tracker;
	
	HardwareInstr * hw_instrs = prog -> hw_instrs;
	u32 hw_instr_count = prog -> hw_instr_count;
	
	CircuitPort * ports = prog -> ports;
	u32 port_count = prog -> port_count;
	
	CircuitInputVector * input_vecs = prog -> input_vecs;
	u32 input_vec_count = prog -> input_vec_count;
	
	CircuitMemoryInfo * circuit_mem_info = prog -> circuit_mem_info;
	u32 circuit_mem_info_count = prog -> circuit_mem_info_count;
	
	TestVector * test_vecs = prog -> test_vecs;
	u32 test_vec_count = prog -> test_vec_count;
	
	//SignalTrace * traces = prog -> traces;
	//u32 trace_count = prog -> trace_count;
	
	//SignalTraceResult * trace_results = prog -> trace_results;
	//u32 trace_result_count = prog -> trace_result_count;
	
	CircuitModule * modules = prog -> modules;
	u32 module_count = prog -> module_count;
	
	CircuitModuleLink * module_links = prog -> module_links;
	u32 module_link_count = prog -> module_link_count;
	
	u32 min_hw_instr_index = 0;
	u32 max_hw_instr_index = prog -> hw_instr_count;
	
	u32 test_result_count = 0;
	u32 test_result_capacity = 1024;
	TestVectorResult * test_results = mem_push_array(&arena , TestVectorResult , test_result_capacity);
	Mem test_result_arena = mem_push_sub(&arena , 1024*sizeof(EvalExprResult));
	Mem test_scratch_arena = mem_push_sub(&arena , KB(8));
	
	stat_tracker -> addr_power_arena = mem_push_sub(&arena , MB(1));
	stat_tracker -> module_power_arena = mem_push_sub(&arena , MB(1));
	
	CircuitState * prev_state = {};
	CircuitState * curr_state = {};
	CircuitState * next_state = {};
	u32 circuit_mem_size = circuit_states[0].size;
	
	InstrTracker instr_tracker = {};
	u32 step_rate = 9999999;
	u32 sw_instr_rate = 1000;
	
	DebugTimer timers[16] = {};
	u32 timer_count = 0;
	
	start_timer(timers , &timer_count ,  "PRE-SIM");
	
	do_controlls(
				 &target_step_index , &target_sw_instr_index , &playing , 
				 step_index , sw_instr_index , hw_instr_count , 
				 step_rate , sw_instr_rate , 
				 input_state , delta_time
				 );
	
	if(step_index+1 == target_step_index && break_on_last_hw_instr)
	{
		debug_break();
		break_on_last_hw_instr = 0;
	}
	
	set_instr_tracker(&instr_tracker , &arena , mips_spec -> stage_lines , mips_spec -> lines);
	
	start_timer(timers , &timer_count ,  "SIM");
	
	PerformSimStepsResult sim_result = perform_sim_steps(
														 0 , step_index , target_step_index , 
														 sw_instr_index , target_sw_instr_index , 
														 0.75 , 
														 hw_instrs , hw_instr_count , circuit_states , 
														 ports , port_count , 
														 input_vecs , input_vec_count , 
														 circuit_mem_info , circuit_mem_info_count , 
														 event_buffer , &instr_tracker
														 );
	
	start_timer(timers , &timer_count ,  "POST-SIM");
	
	i32 time = sim_result.time;
	u32 next_step_index = sim_result.step_index;
	u32 next_sw_instr_index = sim_result.sw_instr_index;
	
	set_circuit_state_pointers(
							   &prev_state , &curr_state , &next_state , circuit_states , time
							   );
	
	stat_tracker -> frame_count = 0;
	
	//check_for_contention(event_buffer);
	get_instr_stages(&instr_tracker , event_buffer);
	perform_power_analysis(stat_tracker , event_buffer , modules , module_count);
	perform_stage_analysis(stat_tracker , &instr_tracker);
	
#if 0	
	test_instr_stages(
					  &instr_tracker , 
					  mips_spec -> lines , mips_spec -> regs , 
					  mips_spec -> instr_set , mips_spec -> fields , mips_spec -> paths , 
					  mips_spec -> reg_width , 
					  instrs , instr_count , event_buffer , curr_state , 
					  time , &arena
					  );
	
	perform_test_vectors(
						 test_vecs , test_vec_count , time , 
						 event_buffer , curr_state , 
						 traces , trace_count , trace_results , trace_result_count , 
						 test_results , &test_result_count , 
						 test_result_capacity , &test_result_arena , &test_scratch_arena
						 );
	
#endif
	start_timer(timers , &timer_count ,  "STAT-OUTPUT");
	
	
	update_csv_files(data , stat_tracker , &instr_tracker , event_buffer);
	
	//time = target_step_index / hw_instr_count;
	start_timer(timers , &timer_count ,  "RENDER");
	
	u32 * viewed_instrs = 0;
	u32 viewed_instr_capacity = 0;
	u32 viewed_instr_count = 0;
	
	if(selected_module_index < module_count)
	{
		min_hw_instr_index = modules[selected_module_index].min_instr_index;
		max_hw_instr_index = modules[selected_module_index].max_instr_index;
		
		viewed_instrs = mem_end(arena , u32);
		viewed_instr_capacity = mem_rem(arena , u32);
		
		//get_viewed_instructions(viewed_instrs , &viewed_instr_count , viewed_instr_capacity);
		mem_push_array(&arena , u32 , viewed_instr_count);
	}
	
	HardwareInstr * curr_instr = hw_instrs + (next_step_index % hw_instr_count);
	RenderBuffer render_buffer = {};
	Font font = data -> font;
	set_render_buffer(&render_buffer , 
					  screen_buffer.width , screen_buffer.height , 
					  &arena , 
					  MB(8) , MB(8) , MB(8) , MB(8));
	
	mem_set_wide(screen_buffer.data , 0xFFFFFFFF , screen_buffer.pixel_count*screen_buffer.bytes_per_pixel);
	
	if(key_pressed(input_state -> keys[KEY_1])) tab = 0;
	if(key_pressed(input_state -> keys[KEY_2])) tab = 1;
	if(key_pressed(input_state -> keys[KEY_3])) tab = 2;
	if(key_pressed(input_state -> keys[KEY_4])) tab = 3;
	if(key_pressed(input_state -> keys[KEY_5])) tab = 4;
	if(key_pressed(input_state -> keys[KEY_6])) tab = 5;
	
	CircuitAddress next_selected_addr = selected_addr;
	CircuitValue selected_addr_value = read_circuit_value(curr_state , selected_addr);
	
	Rect region_sizer = dilate({0,0,render_buffer.screen_size} , -10.0f , -10.0f);
	Rect ribbon_region = push_down(&region_sizer , TEXT_HEIGHT*2 , 2);
	Rect display_region = push_down(&region_sizer , region_sizer.size.y , 0);
	
	stats_to_console(stat_tracker);
	
#if 0
	addr_power_list_to_console(
							   start_of(stat_tracker -> addr_power_arena , CircuitAddressPower) , get_count(stat_tracker -> addr_power_arena , CircuitAddressPower) , 
							   circuit_mem_info , circuit_mem_info_count);
#endif
	
	timers_to_console(timers , timer_count);
	
	do_info_ribbon(
				   &render_buffer , &font , input_state , ribbon_region , 
				   step_index , next_step_index , sw_instr_index , next_sw_instr_index , 
				   hw_instr_count , selected_addr ,
				   &break_button_state , &break_on_last_hw_instr , 
				   signal_backtrace_view_offset , signal_backtrace_view_scale
				   );
	
	if(tab == 0)
	{
		
		Rect tab_sizer = display_region;
		Rect retained_console_region = horizontal_split(tab_sizer , 2 , 0);
		Rect transient_console_region = horizontal_split(tab_sizer , 2 , 1);
		
		do_console_view(
						&render_buffer , &font , retained_console_region , 
						global_retained_console.text , global_retained_console.size
						);
		
		do_console_view(
						&render_buffer , &font , transient_console_region , 
						global_transient_console.text , global_transient_console.size
						);
		
	}
	
	
	else if(tab == 1)
	{
		
		
		Rect tab_sizer = display_region;
		Rect memory_region = push_right(&tab_sizer , 300);
		Rect instr_region = push_right(&tab_sizer , tab_sizer.size.x-200);
		Rect module_region = push_left(&tab_sizer , tab_sizer.size.x);
		
		mem_scroll = scroll_in_region(mem_scroll , memory_region , input_state);
		mem_scroll = clip(mem_scroll , 0 , circuit_mem_size);
		
		instr_scroll = scroll_in_region(instr_scroll , instr_region , input_state);
		instr_scroll = clip(instr_scroll , 0 , hw_instr_count);
		
		do_memory_view(
					   &render_buffer , &font , input_state , 
					   memory_region , mem_scroll , 
					   prev_state , curr_state , next_state , circuit_mem_size , 
					   circuit_mem_info , circuit_mem_info_count , 
					   curr_instr , hw_instrs , hw_instr_count , 
					   selected_addr , &next_selected_addr , min_hw_instr_index , max_hw_instr_index , 
					   &arena
					   );
		
		do_instr_view(
					  &render_buffer , &font , instr_region , instr_scroll , 
					  hw_instrs , hw_instr_count , curr_instr , 
					  viewed_instrs, viewed_instr_count , 
					  circuit_mem_info , circuit_mem_info_count , 
					  selected_addr , min_hw_instr_index , max_hw_instr_index , 
					  &arena
					  );
		
		do_component_view(
						  &render_buffer , &font , input_state , &arena , module_region ,
						  modules , module_count , 
						  module_links , module_link_count , &selected_module_index
						  );
	}
	
	else if(tab == 2)
	{
		
		Rect tab_sizer = display_region;
		Rect signal_region = push_right(&tab_sizer , tab_sizer.size.x-00);
		Rect driver_region = push_right(&tab_sizer , tab_sizer.size.x);
		
		do_signal_backtrace_view(
								 &render_buffer , &font , input_state , signal_region , 
								 event_buffer , selected_addr , selected_addr_value , &next_selected_addr , time , 
								 &signal_backtrace_view_offset , &signal_backtrace_view_scale , 
								 circuit_mem_info , circuit_mem_info_count , &arena
								 );
		
		do_driver_list_view(
							&render_buffer , &font , input_state , driver_region , 
							selected_addr , &next_selected_addr , time , 
							event_buffer , circuit_mem_info , circuit_mem_info_count
							);
		
	}
	
	else if(tab == 3)
	{
		
		Rect tab_sizer = display_region;
		Rect assembly_sizer = horizontal_split(tab_sizer , 3 , 0);
		Rect completed_instr_region = push_down(&assembly_sizer , TEXT_HEIGHT*8);
		Rect assembly_region = push_down(&assembly_sizer , assembly_sizer.size.y);
		Rect lines_region = horizontal_split(tab_sizer , 3 , 1);
		
		Rect mips_control_line_region = 
			vertical_split(
						   lines_region , 
						   1 , 0
						   );
#if 0
		Rect mips_port_line_region = 
			vertical_split(
						   lines_region , 
						   3 , 2
						   );
#endif
		
		do_assembly_view(
						 &render_buffer , &font , assembly_region , 
						 instrs , instr_count , mips_spec , curr_state , &arena
						 );
		
		do_completed_instr_view(
								&render_buffer , &font , completed_instr_region , &instr_tracker , instrs , mips_spec
								);
		
		do_line_display(
						&render_buffer , &font , mips_control_line_region , 
						circuit_mem_info , circuit_mem_info_count , 
						mips_spec -> lines , curr_state , &arena
						);
	}
	
	else if(tab == 4)
	{
		
		Rect tab_sizer = display_region;
		Rect test_region = push_right(&tab_sizer , tab_sizer.size.x*0.5f);
		Rect test_overview_region = push_right(&tab_sizer , tab_sizer.size.x);
		
		show_test_results(
						  &render_buffer , &font , test_overview_region , 
						  test_results , test_result_count , 
						  circuit_mem_info , circuit_mem_info_count , &arena
						  );
		
		do_test_overview(
						 &render_buffer , &font , test_region , 0 , 
						 test_vecs , test_vec_count ,
						 circuit_mem_info , circuit_mem_info_count , 
						 &arena
						 );
	}
	
	else if(tab == 5)
	{
		
		Rect tab_sizer = display_region;
		Rect stats_text_region = push_right(&tab_sizer , 400);
		Rect stats_graph_region = push_right(&tab_sizer , tab_sizer.size.x);
		
		do_assembly_view(
						 &render_buffer , &font , stats_text_region , 
						 instrs , instr_count , mips_spec , curr_state , &arena
						 );
		
		
		do_stats_graph(&render_buffer , &font , stats_graph_region , stat_tracker);
	}
	
	else
	{
		assert_zero;
	}
	
	selected_addr = next_selected_addr;
	
	Bitmap bitmap_buffer = platform_screen_buffer_to_bitmap(screen_buffer , RGBA_PIXEL_FORMAT);
	Camera camera = {};
	
	transform_render_commands(&render_buffer , &camera);
	sort_render_commands(&render_buffer , &arena);
	
	render_screen_space(
						&render_buffer , bitmap_buffer , 
						&arena
						);
	
	start_timer(timers , &timer_count ,  "END");
	
	
	*running &= !key_pressed(input_state -> keys[KEY_ESCAPE]);
	*running &= step_index < 1000000000;
	
	if(*running == 0)
	{
		
		
		save_stats(stat_tracker , ports , port_count , time , "overview.txt" , &arena);
		
		flush_ports(ports , port_count);
		close_ports(ports , port_count);
		port_to_WAV("output.wav" , "port1" , 32 , SIM_TIME_RATE , &arena);
		
		end_csv_file(&data -> frame_power_file);
		end_csv_file(&data -> module_power_file);
		end_csv_file(&data -> addr_power_file);
		end_csv_file(&data -> instr_stage_file);
		
	}
	
	data -> target_step_index = target_step_index;
	data -> target_sw_instr_index = target_sw_instr_index;
	
	data -> step_index = next_step_index;
	data -> sw_instr_index = next_sw_instr_index;
	data -> playing = playing;
	
	data -> tab = tab;
	data -> selected_module_index = selected_module_index;
	data -> mem_scroll = mem_scroll;
	data -> instr_scroll = instr_scroll;
	data -> signal_backtrace_view_offset = signal_backtrace_view_offset;
	data -> signal_backtrace_view_scale = signal_backtrace_view_scale;
	data -> selected_addr = selected_addr;
	data -> break_on_last_hw_instr = break_on_last_hw_instr;
	data -> break_button_state = break_button_state;
	
	return(0);
}

//#pragma optimize("" , off)


