/*
Author: Nathaniel Huesler
Year: 2025
Copywrite Notice: LICENSE.txt 
*/

/*

This file defines the main execution of the simulator. This includes defining sim-memory;
execution of sim-instructions; logging to give information about fanout, enerygy consuptions and
a history of circuit values; port management; and critial pre/post proccessing functions such as instruction sorting, and signal backtrace.

*/

struct CircuitValue
{
	u32 v;
	u32 width;
};

struct CircuitAddress
{
	// NOTE: in bits
	u32 min;
	u32 max;
};

enum
{
	COMP_NO_OP = 0 , 
	UPDATE_IO , 
	COMP_CIRCUIT_STEADY_STATE , 
	COMP_TRANS_GATE , 
	COMP_NOT , 
	COMP_AND2 , 
	COMP_AND4 , 
	COMP_OR2 , 
	COMP_OR4 , 
	COMP_ADDER , 
	COMP_MULTIPLIER , 
	COMP_SHIFT_LEFT , 
	COMP_SHIFT_RIGHT , 
	COMP_MUX2 , 
	COMP_DEMUX2 , 
	COMP_FLIP_FLOP , 
	HW_CONNECTION , 
	COMP_MEMORY , 
	COMP_CLOCK , 
	COMP_COUNT , 
};



struct HardwareInstr
{
	// Functional Information
	u32 function;
	
	CircuitAddress inputs[8];
	CircuitAddress outputs[8];
	
	// Debug Information
	//u32 input_counter;
	//u32 output_counter;
};

struct HardwareInstrInfo
{
	u32 function;
	char * function_name;
	u32 input_count;
	u32 output_count;
	u32 gates_propagation; // NOTE: makes distiction between combinational logic and flip_flops.
	// Used for the levelising algorithm.
	
	u8 switch_power_per_bit;
	u8 static_power_per_bit;
};

HardwareInstrInfo global_hw_instr_bank[COMP_COUNT] = {
	{COMP_NO_OP , "" , 0 , 0 , 0 , 0 , 0} , 
	{UPDATE_IO , "UPDATE_IO" , 0 , 0 , 0 , 0 , 0} , 
	{COMP_CIRCUIT_STEADY_STATE , "COMP_CIRCUIT_STEADY_STATE" , 1 , 1 , 0 , 0 , 0} , 
	{COMP_TRANS_GATE , "TRANS_GATE" , 2 , 1 , 0 , 2 , 1} , 
	{COMP_NOT , "NOT" , 1 , 1 , 0 , 2 , 1} , 
	{COMP_AND2 , "AND2" , 2 , 1 , 0 , 2 , 1} , 
	{COMP_AND4 , "AND4" , 4 , 1 , 0 , 2 , 1} , 
	{COMP_OR2 , "OR2" , 2 , 1 , 0 , 2 , 1} , 
	{COMP_OR2 , "OR4" , 4 , 1 , 0 , 2 , 1} , 
	{COMP_ADDER , "ADDER" , 2 , 1 , 0 , 4 , 1} , 
	{COMP_MULTIPLIER , "MULTIPLER" , 2 , 1 , 0 , 8 , 1} , 
	{COMP_SHIFT_LEFT , "SHIFT_LEFT" , 2 , 1 , 0 , 4 , 1} , 
	{COMP_SHIFT_RIGHT , "SHIFT_RIGHT" , 2 , 1 , 0 , 4 , 1} , 
	{COMP_MUX2 , "MUX2" , 3 , 1 , 0 , 2 , 1} , 
	{COMP_DEMUX2 , "DE-MUX2" , 2 , 2 , 0 , 2 , 1} , 
	{COMP_FLIP_FLOP , "FF" , 3 , 1 , 1 , 2 , 1} , 
	{HW_CONNECTION , "CONNECTION" , 1 , 1 , 0 , 0 , 0} , 
	{COMP_MEMORY , "COMP_MEMORY" , 3 , 1 , 0 , 2 , 1} , 
	{COMP_CLOCK , "COMP_CLOCK" , 1 , 1 , 0 , 2 , 1}
};

u32 global_hw_instr_bank_size = array_size(global_hw_instr_bank);

enum
{
	NO_POLARITY , 
	INPUT_POLARITY , 
	OUTPUT_POLARITY , 
	INOUT_POLARITY
};

struct CircuitMemoryInfo
{
	char name[STR_NAME_SIZE];
	CircuitAddress addr;
	u32 polarity;
};


enum
{
	
	MIN_TRACE_ID = 1 , 
	MAX_TRACE_ID = 1024 , 
	CIRCUIT_SIGNAL_EVENT ,
	STEADY_STATE_EVENT , 
};

struct CircuitEvent
{
	
	u32 type;
	i32 time;
	CircuitAddress source;
	CircuitAddress dest;
	
	CircuitValue result_value;
	CircuitValue original_value;
	CircuitValue input_value;
	u32 instr_func;
};

struct CircuitPowerEvent
{
	i32 time;
	u32 func;
	CircuitAddress addr;
	CircuitValue before_value;
	CircuitValue after_value;
};

struct CircuitFanoutEvent
{
	CircuitAddress addr;
};

struct CircuitEventTimeMark
{
	i32 pass_index;
	i32 time;
	
	u32 steady_state;
	
	u32 event_start_index;
	u32 event_end_index;
	
	u32 power_start_index;
	u32 power_end_index;
	
	u32 fanout_start_index;
	u32 fanout_end_index;
	
};

struct EventBuffer
{
	CircuitEvent * events;
	u32 event_capacity;
	u32 event_start_index;
	u32 event_end_index;
	
	CircuitPowerEvent * power_events;
	u32 power_capacity;
	u32 power_start_index;
	u32 power_end_index;
	
	CircuitFanoutEvent * fanout_events;
	u32 fanout_capacity;
	u32 fanout_start_index;
	u32 fanout_end_index;
	
	CircuitEventTimeMark * time_marks;
	u32 time_mark_capacity;
	u32 time_mark_start_index;
	u32 time_mark_end_index;
	
	CircuitEventTimeMark * curr_mark;
};

struct PortVec
{
	u32 circuit_value;
	i32 time;
};

struct CircuitPort
{
	u32 id;
	u32 direction;
	CircuitAddress addr;
	CircuitAddress clock;
	CircuitAddress enable;
	
	PortVec last_recorded_vec;
	u32 min;
	u32 max;
	u32 sign;
	
	u8 * mem;
	u32 mem_byte_size;
	u32 index;
	
	i32 total_time;
	u32 trigger_count;
	
	char filename[STR_PATH_SIZE];
	File file;
	
};

struct CircuitInputVector
{
	CircuitAddress addr;
	CircuitValue value;
	i32 time;
};



struct CircuitState
{
	u8 * mem;
	u32 size;
};

struct SignalTrace
{
	CircuitAddress * path;
	u32 path_length;
	
	u32 start_result_id;
	u32 end_result_id;
};

struct SignalTraceResult
{
	i32 time;
	u32 available;
};

struct TimeSpec
{
	i32 const_delay;
	u32 id;
	u32 instance;
};

struct SignalLog
{
	CircuitValue value;
	i32 time;
};

struct TestVectorExpr
{
	
	u32 op_code;
	TestVectorExpr * lhs;
	TestVectorExpr * rhs;
	
	CircuitAddress addr;
	CircuitValue value;
	
	TimeSpec time_spec;
	
	//u32 backlog_buffer_size;
	//u32 backlog_start_index;
	//u32 backlog_end_index;
	//SignalLog * backlog_buffer;
	
	/*
	// NOTE: Time relative to present time.
	i32 lhs_time; 
	i32 rhs_time; 
		
	u32 lhs_buffer_size;
	u32 rhs_buffer_size;
		
	u32 lhs_buffer_byte_width;
	u32 rhs_buffer_byte_width;
		
	u8 * lhs_buffer;
	u8 * rhs_buffer;
	*/
};

#define TEST_VECTOR_CONSEQ_CAPACITY 8
#define TEST_VECTOR_ANTEC_CAPACITY 8

struct TestVector
{
	
	// NOTE: conseqence conditions (conseq) must be true given that the the antecedent conditions (antec) are true.
	TestVectorExpr * exprs;
	TestVectorExpr * conseq[TEST_VECTOR_CONSEQ_CAPACITY];
	TestVectorExpr * antec[TEST_VECTOR_ANTEC_CAPACITY];
	
	u32 expr_count;
	u32 conseq_count;
	u32 antec_count;
};

enum
{
	EXPR_NOTHING , 
	EXPR_PASS  , 
	EXPR_OPERAND  , 
	EXPR_MEMORY  , 
	EXPR_POS_EDGE , 
	EXPR_EQL , 
	EXPR_ADD , 
	EXPR_MULT , 
	EXPR_MOVING_BACKTRACE , 
	EXPR_OP_COUNT
};

char * EXPR_OP_CODE_NAMES[EXPR_OP_COUNT] = 
{
	"EXPR_NOTHING" , 
	"EXPR_PASS"  , 
	"EXPR_OPERAND"  , 
	"EXPR_MEMORY"  , 
	"EXPR_POS_EDGE" , 
	"EXPR_EQL" , 
	"EXPR_ADD" , 
	"EXPR_MULT" , 
	"EXPR_MOVING_BACKTRACE"
};

struct EvalExprResult
{
	
	u32 op_code;
	u32 available;
	CircuitValue value;
	CircuitAddress addr;
	i32 time;
	
	EvalExprResult * lhs_result;
	EvalExprResult * rhs_result;
};

struct TestVectorResult
{
	
	u32 passed;
	u32 active;
	EvalExprResult * eval_results;
	EvalExprResult * antec_results[TEST_VECTOR_ANTEC_CAPACITY];
	EvalExprResult * conseq_results[TEST_VECTOR_CONSEQ_CAPACITY];
	
	u32 eval_result_count;
	u32 conseq_count;
	u32 antec_count;
};













HardwareInstrInfo * get_hw_instr_info(u32 target_function)
{
	
	HardwareInstrInfo * result = 0;
	
	for(u32 info_index = 0; info_index < global_hw_instr_bank_size; info_index++)
	{
		HardwareInstrInfo * curr_info = global_hw_instr_bank + info_index;
		if(curr_info -> function == target_function)
		{
			result = curr_info;
			break;
		}
	}
	
	return(result);
}



inline 
u32 width_of(CircuitAddress addr) 
{
	return(addr.max - addr.min);
}

inline 
u32 width_of(CircuitValue value) 
{
	return(value.width);
}

inline
u32 get_byte_width(CircuitAddress addr)
{
	
	u32 result = (7 + addr.max - addr.min)/8;
	return(result);
}

inline
u32 get_byte_width(u32 bit_width)
{
	
	u32 result = (7 + bit_width)/8;
	return(result);
}

inline
u32 operator==(CircuitAddress lhs , CircuitAddress rhs)
{
	return(lhs.min == rhs.min && lhs.max == rhs.max);
}

inline
u32 operator!=(CircuitAddress lhs , CircuitAddress rhs)
{
	return(lhs.min != rhs.min || lhs.max != rhs.max);
}

inline 
u32 in_range(CircuitAddress lhs , CircuitAddress rhs)
{
	return(
		   rhs.min <= lhs.min && lhs.min <= rhs.max &&
		   rhs.min <= lhs.max && lhs.max <= rhs.max
		   );
}

inline
u32 addr_overlap(CircuitAddress lhs , CircuitAddress rhs)
{
	
	u32 overlap_min = max(lhs.min , rhs.min);
	u32 overlap_max = min(lhs.max , rhs.max);
	u32 result = (overlap_min < overlap_max);
	
	return(result);
}

inline 
CircuitAddress bit_sel(CircuitAddress addr , u32 index)
{
	assert(index < addr.max - addr.min);
	CircuitAddress result = {};
	
	result.min = addr.min + index;
	result.max = addr.min + index + 1;
	
	return(result);
}

inline 
u32 bit_sel(CircuitValue value , u32 index)
{
	assert(index < value.width);
	u32 result = value.v >> index & 0x1;
	
	return(result);
}


inline 
CircuitAddress sub_addr(CircuitAddress addr , u32 rel_min , u32 rel_max)
{
	
	u32 width = width_of(addr);
	assert(0 <= rel_min && rel_min < width);
	assert(0 <= rel_max && rel_max <= width);
	assert(rel_min <= rel_max);
	
	CircuitAddress result = {addr.min + rel_min , addr.min + rel_max};
	return(result);
}



// NOTE: for use of min/max_shift_val, see read_circuit_memory().
// NOTE: make a mask of the n bits, starting at bit 'min'; ending on bit 'max'.

u32 get_bit_bound_mask(u32 min , u32 max)
{
	
	u32 min_shifted_val = (min < 32);
	u32 max_shifted_val = (max < 32);
	
	u32 min_mask = (min_shifted_val << min) - 1;
	u32 max_mask = (max_shifted_val << max) - 1;
	u32 mask = min_mask ^ max_mask;
	
	return(mask);
}


#define MIN_UNSIGNED_VALUE(width) 0
#define MAX_UNSIGNED_VALUE(width) ((1 << width) - 1)

#define MIN_SIGNED_VALUE(width) (-(1 << (width-1)))
#define MAX_SIGNED_VALUE(width) ((1 << (width-1)) - 1)

CircuitValue int_to_cv(u32 value , u32 width , u32 sign)
{
	
	CircuitValue result = {};
	assert(width);	
	
	u32 unsigned_min = MIN_UNSIGNED_VALUE(width);
	u32 unsigned_max = MAX_UNSIGNED_VALUE(width);
	
	i32 signed_min = MIN_SIGNED_VALUE(width);
	i32 signed_max = MAX_SIGNED_VALUE(width);
	u32 mask = get_bit_bound_mask(0 , width);
	
	if(sign)
	{
		i32 s_value = value;
		assert(signed_min <= s_value && s_value <= signed_max);
		assert(
			   s_value < 0 && (s_value & (1 << (width-1))) ||
			   s_value >= 0
			   );
	}
	
	else
	{
		assert(unsigned_min <= value && value <= unsigned_max);
	}
	
	result = {value & mask , width};
	return(result);
}


u32 cv_to_int(CircuitValue cv , u32 sign)
{
	
	u32 width = cv.width;
	u32 value = cv.v;
	assert(width);
	
	if(sign)
	{
		if(value & (1 << (width-1)))
		{
			u32 sign_ext_mask = get_bit_bound_mask(width , 32);
			i32 signed_value = (i32)(value | sign_ext_mask);
			value = signed_value;
		}
	}
	
	return(value);
}

CircuitValue set_bits(CircuitValue dest , CircuitValue src , u32 offset , u32 * mask_ptr)
{
	
	u32 min = offset;
	u32 max = src.width + offset;
	u32 value = src.v;
	
	assert(min < 32);
	assert(max <= 32);
	assert(max-min <= 32);
	
	assert(src.width <= dest.width);
	assert(min < dest.width);
	assert(max <= dest.width);
	
	u32 mask = get_bit_bound_mask(min , max);
	dest.v = (dest.v & ~mask) | ((value << min) & mask);
	
	return(dest);
	
}



CircuitMemoryInfo * get_circuit_mem_info(
										 CircuitMemoryInfo * vt , u32 count , CircuitAddress addr)
{
	
	
	//u32 byte_min = addr.min/8;
	//u32 byte_max = addr.min/8 + get_byte_width(addr);
	CircuitMemoryInfo * result = 0;
	
	for(u32 i = 0; i < count; i++)
	{
		CircuitMemoryInfo * info = vt + i;
		CircuitAddress info_addr = info -> addr;
		
		u32 overlap_min = max(info_addr.min , addr.min);
		u32 overlap_max = min(info_addr.max , addr.max);
		
		if(overlap_min < overlap_max)
		{
			result = info;
			break;
		}
	}
	
	return(result);
}

CircuitAddress * find_addr(
						   CircuitAddress target_addr , 
						   CircuitAddress * addrs , u32 count)
{
	CircuitAddress * result = 0;
	for(u32 i = 0; i < count; i++)
	{
		
		if(addr_overlap(addrs[i] , target_addr))
		{
			result = addrs + i;
			break;
		}
	}
	
	return(result);
}



CircuitMemoryInfo * get_circuit_mem_info(CircuitMemoryInfo * vt , u32 count , char * name)
{
	
	CircuitMemoryInfo * result = 0;
	for(u32 i = 0; i < count; i++)
	{
		CircuitMemoryInfo * info = vt + i;
		if(str_eql(info -> name , name))
		{
			result = info;
			break;
		}
	}
	
	return(result);
}

inline
u32 takes_input_from(HardwareInstr * lhs , HardwareInstr * rhs , u32 lhs_input_count , u32 rhs_output_count)
{
	
	u32 result = 0;
	CircuitAddress * lhs_inputs = lhs -> inputs;
	CircuitAddress * rhs_outputs = rhs -> outputs;
	
	for(u32 input_index = 0; input_index < lhs_input_count; input_index++)
	{
		for(u32 output_index = 0; output_index < rhs_output_count; output_index++)
		{
			if(in_range(lhs_inputs[input_index] , rhs_outputs[output_index]))
			{
				result = 1;
				break;
			}
		}
	}
	
	return(result);
	
}
void swap_out(HardwareInstr * instrs , u32 * dep_array , u32 * gating_array , u32 pivot_index , u32 swap_index)
{
	HardwareInstr temp_instr = instrs[swap_index];
	instrs[swap_index] = instrs[pivot_index];
	instrs[pivot_index] = temp_instr;
	
	dep_array[swap_index] = dep_array[pivot_index];
	gating_array[swap_index] = gating_array[pivot_index];
}


// NOTE: Sorts the instructions in levelisation order. After the sort, instructions at the start of the
// list are inputs, and instructions at the end are outputs. This function considers loops, and cuts the loop
// at the first detection of the loop,

// Executing the instructions in levelilisation order means that the signal steady state is acheived in one pass


void sort_hardware_instructions(HardwareInstr * instrs , u32 instr_count , Mem * arena)
{
	
	u32 * dep_array = mem_push(arena ,  u32 , sizeof(u32)*instr_count);
	u32 * gating_array = mem_push(arena , u32 , sizeof(u32)*instr_count);
	
	u32 min_pivot_index = 0;
	u32 max_pivot_index = instr_count;
	u32 min_dep = MAX_U32;
	u32 max_dep = 0;
	
	while(min_pivot_index < max_pivot_index)
	{
		
		mem_set(dep_array , 0 , instr_count*sizeof(u32));
		min_dep = MAX_U32;
		max_dep = 0;
		
		for(u32 i = min_pivot_index; i < max_pivot_index; i++)
		{
			for(u32 j = min_pivot_index; j < max_pivot_index; j++)
			{
				if(i != j)
				{
					
					HardwareInstr * curr_instr = instrs + i;
					HardwareInstrInfo * curr_info = get_hw_instr_info(curr_instr -> function);
					u32 input_count = curr_info -> input_count;
					
					HardwareInstr * comp_instr = instrs + j;
					HardwareInstrInfo * comp_info = get_hw_instr_info(comp_instr -> function);
					u32 output_count = comp_info -> output_count;
					
					gating_array[i] = curr_info -> gates_propagation;
					
					if(takes_input_from(curr_instr , comp_instr , input_count , output_count))
					{
						dep_array[i]++;
					}
				}
			}
		}
		
		for(u32 i = min_pivot_index; i < max_pivot_index; i++)
		{
			min_dep = min_dep < dep_array[i] ? min_dep : dep_array[i];
			max_dep = max_dep > dep_array[i] ? max_dep : dep_array[i];
		}
		
		// NOTE: loop condition.
		if(min_dep == max_dep && max_pivot_index-min_pivot_index > 1)
		{
			min_pivot_index++;
		}
		
		else
		{
			for(u32 i = min_pivot_index; i < max_pivot_index; i++)
			{
				
				if(dep_array[i] == min_dep)
				{
					if(gating_array[i])
					{
						max_pivot_index--;
						swap_out(instrs , dep_array , gating_array , max_pivot_index , i);
					}
					
					else
					{
						swap_out(instrs , dep_array , gating_array , min_pivot_index , i);
						min_pivot_index++;
					}
				}
			}
		}
	}
	
	mem_pop(arena , dep_array);
	
}


inline
void initialise_circuit_memory(u8 * mem , u32 size)
{
	for(u32 i = 0; i < size; i++)
	{
		mem[i] = i;
	}
}


void reset_circuit_mem(u8 * mem , u32 mem_size , EventBuffer * signals , CircuitMemoryInfo * infos , u32 info_count)
{
	
	mem_set(mem , 0 , mem_size);
#if 0
	u32 curr_byte_addr = 0;
	
	while(curr_byte_addr < mem_size)
	{
		CircuitAddress addr = {curr_byte_addr*8 , (curr_byte_addr+1)*8};
		CircuitMemoryInfo * curr_info = get_circuit_mem_info(infos , info_count , addr);
		
		push_signal_event(addr , addr , 0 , 0 , signals);
		
		u32 byte_width = curr_info -> byte_width;
		curr_byte_addr += byte_width;
		
	}
#endif
}


CircuitValue read_circuit_memory_(u8 * mem , CircuitAddress addr)
{
	
	CircuitValue result = {};
	
	u32 min = addr.min;
	u32 max = addr.max;
	u32 bit_width = max-min;
	//u32 byte_width = get_byte_width(bit_width);
	assert(bit_width <= 32);
	
	u32 min_byte = min / 8;
	//u32 max_byte = max / 8;
	
	u32 rel_min = min - min_byte*8;
	u32 rel_max = max - min_byte*8;
	
	// NOTE: set the shifted value to zero if the shift amount is great than 31 because the shl instruction
	// masks in only the 5 lsb of the shift value, thus a value of 32 results in 0 shifting. 
	// We want all shifts above 31 to have all bits set in the mask. 
	
	u32 min_shifted_val = (rel_min < 32);
	u32 max_shifted_val = (rel_max < 32);
	
	u32 min_mask = (min_shifted_val << rel_min) - 1;
	u32 max_mask = (max_shifted_val << rel_max) - 1;
	u32 mask = min_mask ^ max_mask;
	
	u32 value = 0;
	u8 * src = mem + min_byte;
	
#if 0
	u8 * dest = (u8*)&value;
	for(u32 i = min_byte; i*8 < max; i++)
	{
		u8 byte_index = (i - min_byte);
		u8 bit_index = byte_index*8;
		u32 byte_mask = 0x000000FF << bit_index;
		
		//u8 rel_mask = ((u8*)mask)[3-byte_index];
		u8 rel_mask = (mask & byte_mask) >> bit_index;
		u8 rel_shift = bitscan_right(rel_mask);
		
		u8 byte_src = src[byte_index] >> rel_shift;
		u8 byte_dest = dest[byte_index];
		
		dest[byte_index] = (byte_src & rel_mask) | (byte_dest & ~rel_mask);
	}
	
#else
	
	u64 src_value = 0;
	mem_copy(&src_value , src , sizeof(u64));
	value = (src_value & (u64)mask) >> rel_min;
	
#endif
	
	
	
	result.v = value;
	result.width = bit_width;
	
	return(result);
}

void write_circuit_memory_(u8 * mem , CircuitAddress addr , CircuitValue value)
{
	
	u32 min = addr.min;
	u32 max = addr.max;
	u32 bit_width = max-min;
	//u32 byte_width = get_byte_width(bit_width);
	assert(bit_width <= 32);
	
	u32 min_byte = min / 8;
	//u32 max_byte = max / 8;
	
	u32 rel_min = min - min_byte*8;
	u32 rel_max = max - min_byte*8;
	
	u32 min_shifted_val = (rel_min < 32);
	u32 max_shifted_val = (rel_max < 32);
	
	// NOTE: for use of min/max_shift_val, see read_circuit_memory().
	// NOTE: make a mask of the n bits, where n is 'max' and 'min' for the respective mask.
	u32 min_mask = (min_shifted_val << rel_min) - 1;
	u32 max_mask = (max_shifted_val << rel_max) - 1;
	u32 mask = min_mask ^ max_mask;
	
	u8 * dest = mem + min_byte;
	
#if 0
	for(u32 i = min_byte; i*8 < max; i++)
	{
		u8 byte_index = (i - min_byte);
		u8 byte_mask = mask >> byte_index*8;
		u8 byte_src = (value.v << rel_min) >> byte_index*8;
		u8 byte_dest = dest[byte_index];
		dest[byte_index] = (byte_src & byte_mask) | (byte_dest & ~byte_mask);
	}
#else
	
	u64 src_value = value.v << rel_min;
	u64 dest_value = 0;
	mem_copy(&dest_value , dest , sizeof(u64));
	
	dest_value = (src_value & (u64)mask) | (dest_value & ~(u64)mask);
	mem_copy(dest , &dest_value , sizeof(u64));
	
	//u64 src_value = value.v;
	//*(u32*)(mem + min_byte) = (src_value << rel_min) & mask;
	
#endif
}

CircuitValue read_circuit_value(CircuitState * state , CircuitAddress addr)
{
	
	CircuitValue result = {};
	u8 * mem = state -> mem;
	u32 size = state -> size*8;
	
	assert(addr.min <= size && addr.max <= size);
	result = read_circuit_memory_(mem , addr);
	
	return(result);
	
}


void write_circuit_value(CircuitState * state , CircuitAddress addr , CircuitValue value)
{
	
	
	u8 * mem = state -> mem;
	u32 size = state -> size*8;
	
	assert(addr.min <= size && addr.max <= size);
	write_circuit_memory_(mem , addr , value);
}


CircuitEvent * push_circuit_event(u32 type , i32 time , EventBuffer * buffer)
{
	
	u32 start_index = buffer -> event_start_index;
	u32 end_index = buffer -> event_end_index;
	u32 capacity = buffer -> event_capacity;
	CircuitEvent * events = buffer -> events;
	
	CircuitEvent * event = events + end_index++;
	event -> type = type;
	event -> time = time;
	
	if(end_index >= capacity) end_index = 0;
	if(start_index == end_index)
	{
		if(++start_index >= capacity) start_index = 0;
	}
	
	buffer -> event_start_index = start_index;
	buffer -> event_end_index = end_index;
	
	return(event);
}


CircuitEvent * push_signal_event(
								 CircuitAddress source , CircuitAddress dest , 
								 CircuitValue input_value , CircuitValue original_value , CircuitValue result_value , 
								 i32 curr_time , u32 instr_func , EventBuffer * buffer)
{
	
	CircuitEvent * event = push_circuit_event(CIRCUIT_SIGNAL_EVENT , curr_time , buffer);
	event -> source = source;
	event -> dest = dest;
	event -> result_value = result_value;
	event -> input_value = input_value;
	event -> original_value = original_value;
	event -> instr_func = instr_func;
	
	return(event);
}


CircuitPowerEvent * push_power(
							   CircuitAddress addr , CircuitValue before_value , CircuitValue after_value ,
							   i32 curr_time , u32 func , EventBuffer * buffer)
{
	
	u32 start_index = buffer -> power_start_index;
	u32 end_index = buffer -> power_end_index;
	u32 capacity = buffer -> power_capacity;
	CircuitPowerEvent * power_events = buffer -> power_events;
	CircuitPowerEvent * power = power_events + end_index++;
	
	power -> time = curr_time;
	power -> addr = addr;
	power -> before_value = before_value;
	power -> after_value = after_value;
	power -> func = func;
	
	if(end_index >= capacity) end_index = 0;
	if(start_index == end_index)
	{
		if(++start_index >= capacity) start_index = 0;
	}
	
	buffer -> power_start_index = start_index;
	buffer -> power_end_index = end_index;
	
	return(power);
}


CircuitFanoutEvent * push_fanout(CircuitAddress addr , EventBuffer * buffer)
{
	
	u32 start_index = buffer -> fanout_start_index;
	u32 end_index = buffer -> fanout_end_index;
	u32 capacity = buffer -> fanout_capacity;
	CircuitFanoutEvent * fanout_events = buffer -> fanout_events;
	CircuitFanoutEvent * fanout = fanout_events + end_index++;
	
	fanout -> addr = addr;
	
	if(end_index >= capacity) end_index = 0;
	if(start_index == end_index)
	{
		if(++start_index >= capacity) start_index = 0;
	}
	
	buffer -> fanout_start_index = start_index;
	buffer -> fanout_end_index = end_index;
	
	return(fanout);
}


inline
u32 is_pos_edge(
				CircuitAddress addr , 
				CircuitState * prev_state , CircuitState * curr_state)
{
	
	CircuitValue prev_value = read_circuit_value(prev_state , addr);
	CircuitValue curr_value = read_circuit_value(curr_state , addr);
	u32 result = (!prev_value.v && curr_value.v);
	
	return(result);
}

inline
u32 is_neg_edge(
				CircuitAddress addr , 
				CircuitState * prev_state , CircuitState * curr_state)
{
	
	CircuitValue prev_value = read_circuit_value(prev_state , addr);
	CircuitValue curr_value = read_circuit_value(curr_state , addr);
	u32 result = (prev_value.v && !curr_value.v);
	
	return(result);
}

void set_input_vectors(
					   CircuitInputVector * inputs , u32 input_count , 
					   CircuitState * prev_state , CircuitState * curr_state , CircuitState * next_state , 
					   i32 time , EventBuffer * buffer)
{
	
	for(u32 input_index = 0; input_index < input_count; input_index++)
	{
		CircuitInputVector * vec = inputs + input_index;
		if(vec -> time == time)
		{
			
			CircuitAddress addr = vec -> addr;
			CircuitValue addr_value = read_circuit_value(curr_state , addr);
			
			write_circuit_value(prev_state , vec -> addr , vec -> value);
			write_circuit_value(curr_state , vec -> addr , vec -> value);
			write_circuit_value(next_state , vec -> addr , vec -> value);
			
			push_signal_event(
							  CircuitAddress{} , vec -> addr , 
							  vec -> value , addr_value  , vec -> value , 
							  time , UPDATE_IO , buffer
							  );
		}
	}
}


void open_ports(CircuitPort * ports , u32 port_count , Mem * arena)
{
	
	for(u32 port_index = 0; port_index < port_count; port_index++)
	{
		CircuitPort * port = ports + port_index;
		
		char * filename = port -> filename;
		File file = {};
		
		if(port -> direction)
		{
			file = open_file(filename , 0 , WRITE_MODE);
			u32 mem_size = sizeof(PortVec)*16;
			
			port -> mem = mem_push(arena , u8 , mem_size);
			port -> mem_byte_size = mem_size;
			port -> index = 0;
		}
		
		else
		{
			file = open_file(filename , 0 , READ_MODE);
			if(file.platform_data)
			{
				u32 file_size = get_file_size_by_handle(file.platform_data);
				port -> mem = mem_push(arena , u8 , file_size);
				port -> mem_byte_size = file_size;
				port -> index = 0;
				
				read(&file , port -> mem , port -> mem_byte_size , 0);
			}
			
		}
		
		port -> file = file;
	}
	
	
}

void close_ports(CircuitPort * ports , u32 port_count)
{
	
	for(u32 port_index = 0; port_index < port_count; port_index++)
	{
		CircuitPort * port = ports + port_index;
		File * file = &port -> file;
		close_file(file);
	}
	
}

CircuitPort * get_port_(CircuitPort * ports , u32 port_count , u32 id)
{
	
	CircuitPort * result = 0;
	for(u32 i = 0; i < port_count; i++)
	{
		CircuitPort * port = ports + i;
		if(port -> id == id)
		{
			result = port;
			break;
		}
	}
	
	return(result);
}



inline
u32 write_port_line(void * buffer , PortVec vec)
{
	return(mem_copy(buffer , &vec , sizeof(PortVec)));
}

inline
u32 write_port_line(void * buffer , u32 value , u32 width , i32 time , u32 sign)
{
	CircuitValue cv = int_to_cv(value , width , sign);
	return(write_port_line(buffer , PortVec{cv.v , time}));
}

inline
u32 read_port_line(void * buffer , PortVec * vec)
{
	return(mem_copy(vec , buffer, sizeof(PortVec)));
}



void check_port_bounds(CircuitPort * port)
{
	
	u32 width = width_of(port -> addr);
	u32 min = port -> min;
	u32 max = port -> max;
	u32 sign = port -> sign;
	
	CircuitValue cv = CircuitValue{port -> last_recorded_vec.circuit_value , width};
	u32 val = cv_to_int(cv , sign);
	
	if(sign)
	{
		assert((i32)min >= MIN_SIGNED_VALUE(width));
		assert((i32)max <= MAX_SIGNED_VALUE(width));
		assert((i32)min <= (i32)val && (i32)val <= (i32)max);
	}
	
	else
	{
		assert(min >= MIN_UNSIGNED_VALUE(width));
		assert(max <= MAX_UNSIGNED_VALUE(width));
		assert(min <= val && val <= max);
	}
	
	
}

CircuitValue read_from_port(CircuitPort * port , i32 time , u32 mode)
{
	
	u32 width = width_of(port -> addr);
	CircuitValue result = {};
	
	PortVec * vecs = (PortVec*)port -> mem;
	u32 index = port -> index;
	u32 vec_count = port -> mem_byte_size / sizeof(PortVec);
	PortVec * curr_vec = vecs + index+0;
	PortVec * next_vec = vecs + index+1;
	PortVec * end_vec = vecs + vec_count;
	
	if(mode)
	{
		if(next_vec < end_vec)
		{
			index++;
			curr_vec = vecs + index+0;
			next_vec = vecs + index+1;
			
		}
	}
	
	else
	{
		while(next_vec < end_vec)
		{
			if(next_vec -> time >= time) break;
			
			index++;
			curr_vec = vecs + index+0;
			next_vec = vecs + index+1;
			
		}
		
		assert(curr_vec -> time <= time && time <= next_vec -> time);
	}
	
	
	// TODO: how to handle out of bounds samples?
	port -> index = index;
	port -> last_recorded_vec = *curr_vec;
	
	check_port_bounds(port);
	
	result = CircuitValue{curr_vec -> circuit_value , width};
	return(result);
}

void write_to_port(CircuitPort * port , CircuitValue value , i32 time)
{
	
	PortVec * vecs = (PortVec*)port -> mem;
	u32 vec_count = port -> mem_byte_size / sizeof(PortVec);
	u32 index = port -> index;
	
	PortVec last_recorded_vec = port -> last_recorded_vec;
	PortVec new_vec = {value.v , time};
	
	if(last_recorded_vec.circuit_value != value.v && time > last_recorded_vec.time)
	{
		
		if(index == vec_count)
		{
			File * file = &port -> file;
			write(file , vecs , index*sizeof(PortVec) , file -> offset);
			index = 0;
		}
		
		write_port_line(vecs + index++ , new_vec);
		last_recorded_vec = new_vec;
		
	}
	
	port -> index = index;
	port -> last_recorded_vec = last_recorded_vec;
	
	check_port_bounds(port);
	
}

void flush_ports(CircuitPort * ports , u32 port_count)
{
	
	for(u32 port_index = 0; port_index < port_count; port_index++)
	{
		CircuitPort * port = ports + port_index;
		File * file = &port -> file;
		
		PortVec * vecs = (PortVec*)port -> mem;
		u32 index = port -> index;
		
		if(port -> direction)
		{
			write(file , vecs , index*sizeof(PortVec) , file -> offset);
		}
	}
	
}

void update_ports(
				  CircuitPort * ports , u32 port_count , 
				  CircuitState * prev_state , CircuitState * curr_state , CircuitState * next_state , i32 time)
{
	
	
	
	for(u32 port_index = 0; port_index < port_count; port_index++)
	{
		CircuitPort * port = ports + port_index;
		CircuitAddress port_addr = port -> addr;
		CircuitAddress clock_addr = port -> clock;
		CircuitAddress enable_addr = port -> enable;
		
		CircuitValue prev_clock = read_circuit_value(prev_state , clock_addr);
		CircuitValue curr_clock = read_circuit_value(curr_state , clock_addr);
		CircuitValue enable = read_circuit_value(curr_state , enable_addr);
		u32 edge = !prev_clock.v && curr_clock.v;
		u32 latch = (enable.v && edge);
		
		CircuitValue port_value = read_circuit_value(curr_state , port_addr);
		
		if(port -> direction)
		{
			if(latch)
			{
				write_to_port(port , port_value , time);
				write_circuit_value(next_state , port_addr , port_value);
				
				port -> total_time = time;
				port -> trigger_count++;
			}
		}
		
		else
		{
			if(latch)
			{
				port_value = read_from_port(port , time , 1);
				port -> total_time = time;
				port -> trigger_count++;
			}
			
			write_circuit_value(next_state , port_addr , port_value);
		}
	}
}


//#define push_signal_event(...)

#pragma optimize("" , on)
void perform_hw_instruction(
							u32 instr_index , HardwareInstr * hw_instrs , u32 hw_instr_count , 
							CircuitState * prev_state , CircuitState * curr_state , CircuitState * next_state , 
							CircuitPort * ports , u32 port_count , 
							CircuitInputVector * input_vecs , u32 input_vec_count , 
							CircuitMemoryInfo * mem_info , u32 mem_info_count , 
							EventBuffer * event_buffer , i32 time)
{
	
	
	HardwareInstr * instr = hw_instrs + instr_index;
	HardwareInstrInfo * instr_info = get_hw_instr_info(instr -> function);
	u32 input_count = instr_info -> input_count;
	u32 output_count = instr_info -> output_count;
	
	CircuitAddress * input_addrs = instr -> inputs;
	CircuitAddress * output_addrs = instr -> outputs;
	u32 func = instr -> function;
	
	switch(func)
	{
		
		case(UPDATE_IO):
		{
			
			set_input_vectors(
							  input_vecs , input_vec_count , 
							  prev_state , curr_state , next_state , 
							  time , event_buffer
							  );
			
			update_ports(
						 ports , port_count , prev_state , curr_state , next_state , time
						 );
			
			break;
		}
		
		case(COMP_CIRCUIT_STEADY_STATE):
		{
			assert(input_count == 1);
			assert(output_count == 1);
			
			CircuitAddress addr_space = input_addrs[0];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(Q);
			CircuitValue out = {0 , width};
			
			addr_space.min /= 8;
			addr_space.max /= 8;
			
			u8 * curr_mem = curr_state -> mem;
			u8 * prev_mem = prev_state -> mem;
			
			u32 curr_mem_size = curr_state -> size;
			u32 prev_mem_size = prev_state -> size;
			
			assert(addr_space.min <= curr_mem_size && addr_space.max <= curr_mem_size);
			assert(addr_space.min <= prev_mem_size && addr_space.max <= prev_mem_size);
			
			u8 * curr_min = curr_mem + addr_space.min;
			u8 * prev_min = prev_mem + addr_space.min;
			u32 size = width_of(addr_space); // NOTE: already in bytes.
			
			if(mem_eql(curr_min , prev_min , size))
			{
				out.v = 1;
				push_circuit_event(STEADY_STATE_EVENT , time , event_buffer);
			}
			
			write_circuit_value(next_state , Q , out);
			break;
			
		}
		
		case(COMP_TRANS_GATE):
		{
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress S = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(Q));
			assert(1 == width_of(S));
			
			CircuitValue q_val = read_circuit_value(curr_state , Q);
			CircuitValue a_val = read_circuit_value(curr_state , A);
			CircuitValue s_val = read_circuit_value(curr_state , S);
			CircuitValue out = {a_val.v , width};
			
			if(s_val.v)
			{
				write_circuit_value(next_state , Q , out);
				push_signal_event(A , Q , a_val , q_val , out , time , func , event_buffer);
				push_signal_event(S , Q , s_val , q_val , out , time , func , event_buffer);
			}
			
			push_power(Q , q_val , out , time , func , event_buffer);
			push_fanout(S , event_buffer);
			
			
			break;
		}
		
		case(COMP_NOT):
		{
			assert(input_count == 1);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(Q));
			
			CircuitValue q_val = read_circuit_value(curr_state , Q);
			CircuitValue a_val = read_circuit_value(curr_state , A);
			CircuitValue out = {~a_val.v , width};
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , a_val , q_val , out , time , func , event_buffer);
			
			push_power(Q , q_val , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			
			break;
		}
		
		case(COMP_AND2):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			CircuitValue out = {lhs.v & rhs.v , width};
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			break;
		}
		
		
		case(COMP_AND4):
		{
			
			assert(input_count == 4);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress C = input_addrs[2];
			CircuitAddress D = input_addrs[3];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(C));
			assert(width == width_of(D));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue a = read_circuit_value(curr_state , A);
			CircuitValue b = read_circuit_value(curr_state , B);
			CircuitValue c = read_circuit_value(curr_state , C);
			CircuitValue d = read_circuit_value(curr_state , D);
			CircuitValue out = {a.v & b.v & c.v & d.v , width};
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , a , q , out , time , func , event_buffer);
			push_signal_event(B , Q , b , q , out , time , func , event_buffer);
			push_signal_event(C , Q , c , q , out , time , func , event_buffer);
			push_signal_event(D , Q , d , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			push_fanout(C , event_buffer);
			push_fanout(D , event_buffer);
			
			break;
		}
		
		case(COMP_OR2):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			CircuitValue out = {lhs.v | rhs.v , width};
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			break;
		}
		
		case(COMP_OR4):
		{
			
			assert(input_count == 4);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress C = input_addrs[2];
			CircuitAddress D = input_addrs[3];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(C));
			assert(width == width_of(D));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue a = read_circuit_value(curr_state , A);
			CircuitValue b = read_circuit_value(curr_state , B);
			CircuitValue c = read_circuit_value(curr_state , C);
			CircuitValue d = read_circuit_value(curr_state , D);
			CircuitValue out = {a.v | b.v | c.v | d.v , width};
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , a , q , out , time , func , event_buffer);
			push_signal_event(B , Q , b , q , out , time , func , event_buffer);
			push_signal_event(C , Q , c , q , out , time , func , event_buffer);
			push_signal_event(D , Q , d , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			push_fanout(C , event_buffer);
			push_fanout(D , event_buffer);
			
			break;
		}
		
		case(COMP_ADDER):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			
			i32 max = MAX_SIGNED_VALUE(width);
			i32 lhs_val = cv_to_int(lhs , 1);
			i32 rhs_val = cv_to_int(rhs , 1);
			i32 out_val = lhs_val + rhs_val;
			
			out_val = out_val % max;
			CircuitValue out = int_to_cv(out_val , width , 1);
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q ,out , time , func , event_buffer);
			
			push_power(Q , out , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			
			break;
		}
		
		case(COMP_MULTIPLIER):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(A);
			
			assert(width_of(A) <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			
			i32 max = MAX_SIGNED_VALUE(width);
			i32 lhs_val = cv_to_int(lhs , 1);
			i32 rhs_val = cv_to_int(rhs , 1);
			i32 out_val = lhs_val * rhs_val;
			
			out_val = out_val % max;
			
			CircuitValue out = int_to_cv(out_val , width , 1);
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			
			break;
		}
		
		case(COMP_SHIFT_LEFT):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(A);
			
			assert(width_of(A) <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			
			u32 bit_mask = get_bit_bound_mask(0 , width);
			i32 lhs_val = cv_to_int(lhs , 1);
			i32 rhs_val = cv_to_int(rhs , 1);
			i32 out_val = (lhs_val << rhs_val) & bit_mask;
			
			CircuitValue out = int_to_cv(out_val , width , 1);
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			
			break;
		}
		
		case(COMP_SHIFT_RIGHT):
		{
			
			assert(input_count == 2);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(A);
			
			assert(width_of(A) <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			
			u32 bit_mask = get_bit_bound_mask(0 , width);
			i32 lhs_val = cv_to_int(lhs , 1);
			i32 rhs_val = cv_to_int(rhs , 1);
			i32 out_val = (lhs_val >> rhs_val) & bit_mask;
			
			CircuitValue out = int_to_cv(out_val , width , 1);
			
			write_circuit_value(next_state , Q , out);
			push_signal_event(A , Q , lhs , q , out , time , func , event_buffer);
			push_signal_event(B , Q , rhs , q , out , time , func , event_buffer);
			
			push_power(Q , q , out , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			
			break;
		}
		
		case(COMP_MUX2):
		{
			assert(input_count == 3);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress S = input_addrs[2];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(A);
			
			assert(width <= 32);
			assert(width == width_of(B));
			assert(width == width_of(Q));
			assert(width_of(S) == 1);
			
			CircuitValue q = read_circuit_value(curr_state , Q);
			CircuitValue lhs = read_circuit_value(curr_state , A);
			CircuitValue rhs = read_circuit_value(curr_state , B);
			CircuitValue select = read_circuit_value(curr_state , S);
			
			CircuitAddress in_addr = select.v ? B : A;
			CircuitValue in_value = select.v ? rhs : lhs;
			
			write_circuit_value(next_state , Q , in_value);
			push_signal_event(in_addr , Q , in_value , q , in_value , time , func , event_buffer);
			push_signal_event(S , Q , select , q , in_value , time , func , event_buffer);
			
			push_power(Q , q , in_value , time , func , event_buffer);
			push_fanout(S , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			break;
		}
		
		case(COMP_DEMUX2):
		{
			
			assert(input_count == 3);
			assert(output_count == 2);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress B = input_addrs[1];
			CircuitAddress S = input_addrs[2];
			CircuitAddress Q1 = output_addrs[0];
			CircuitAddress Q2 = output_addrs[1];
			u32 width = width_of(A);
			
			assert(width <= 32);
			assert(width == width_of(Q1));
			assert(width == width_of(Q2));
			assert(width_of(S) == 1);
			
			CircuitValue q1 = read_circuit_value(curr_state , Q1);
			CircuitValue q2 = read_circuit_value(curr_state , Q2);
			CircuitValue A_value = read_circuit_value(curr_state , A);
			CircuitValue B_value = read_circuit_value(curr_state , B);
			CircuitValue select = read_circuit_value(curr_state , S);
			CircuitValue in_value1 = {0 , width};
			CircuitValue in_value2 = {0 , width};
			
			CircuitAddress in_addr1 = {};
			CircuitAddress in_addr2 = {};
			
			if(select.v)
			{
				in_value1 = B_value;
				in_addr1 = B;
				
				in_value2 = A_value;
				in_addr2 = A;
			}
			
			else
			{
				in_value1 = A_value;
				in_addr1 = A;
				
				in_value2 = B_value;
				in_addr2 = B;
			}
			
			write_circuit_value(next_state , Q1 , in_value1);
			write_circuit_value(next_state , Q2 , in_value2);
			
			push_signal_event(in_addr1 , Q1 , in_value1 , q1 , in_value1 , time , func , event_buffer);
			push_signal_event(in_addr2 , Q2 , in_value2 , q2 , in_value2 , time , func , event_buffer);
			
			push_power(Q1 , q1 , in_value1 , time , func , event_buffer);
			push_power(Q2 , q2 , in_value2 , time , func , event_buffer);
			
			push_fanout(S , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(B , event_buffer);
			
			break;
		}
		
		case(COMP_FLIP_FLOP):
		{
			
			assert(input_count == 3);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress C = input_addrs[1];
			CircuitAddress S = input_addrs[2];
			CircuitAddress Q = output_addrs[0];
			
			u32 width = width_of(A);
			
			assert(width <= 32);
			assert(width == width_of(Q));
			assert(width_of(C) == 1);
			assert(width_of(S) == 1);
			
			CircuitValue A_val = read_circuit_value(curr_state , A);
			CircuitValue Q_val = read_circuit_value(curr_state , Q);
			CircuitValue prev_clock = read_circuit_value(prev_state , C);
			CircuitValue curr_clock = read_circuit_value(curr_state , C);
			CircuitValue select = read_circuit_value(curr_state , S);
			u32 edge = !prev_clock.v && curr_clock.v;
			u32 latch = (select.v && edge);
			
			CircuitValue next_value = latch ? A_val : Q_val;
			write_circuit_value(next_state , Q , next_value);
			
			if(latch)
			{
				push_signal_event(A , Q , A_val , Q_val , next_value , time , func , event_buffer);
			}
			
			else
			{
				push_signal_event(Q , Q , Q_val , Q_val , next_value , time , func , event_buffer);
			}
			
			push_signal_event(C , Q , curr_clock , Q_val , next_value , time , func , event_buffer);
			push_signal_event(S , Q , select , Q_val , next_value , time , func , event_buffer);
			
			push_power(Q , Q_val , next_value , time , func , event_buffer);
			push_fanout(A , event_buffer);
			push_fanout(C , event_buffer);
			push_fanout(S , event_buffer);
			
			break;
		}
		
		case(HW_CONNECTION):
		{
			
			assert(input_count == 1);
			assert(output_count == 1);
			
			CircuitAddress A = input_addrs[0];
			CircuitAddress Q = output_addrs[0];
			u32 width = width_of(A);
			
			assert(width <= 32);
			assert(width == width_of(Q));
			
			CircuitValue value_Q = read_circuit_value(curr_state , Q);
			CircuitValue value_A = read_circuit_value(curr_state , A);
			
			write_circuit_value(next_state , Q , value_A);
			push_signal_event(A , Q , value_A , value_Q , value_A , time , func , event_buffer);
			break;
		}
		
		case(COMP_MEMORY):
		{
			
			assert(input_count == 3);
			assert(output_count == 1);
			
			CircuitAddress start_addr = input_addrs[0];
			CircuitAddress end_addr = input_addrs[1];
			CircuitAddress load_addr_addr = input_addrs[2];
			CircuitAddress out_addr = output_addrs[0];
			
			u32 total_bits = end_addr.min - start_addr.min;
			u32 word_width = width_of(start_addr);
			u32 addr_space = total_bits/word_width;
			
			assert(addr_space*word_width == total_bits);
			assert(width_of(load_addr_addr) <= 32);
			assert(word_width <= 32);
			assert(word_width == width_of(end_addr));
			assert(word_width == width_of(out_addr));
			
			CircuitValue load_addr_value = read_circuit_value(curr_state , load_addr_addr);
			i32 offset = cv_to_int(load_addr_value , 1);
			
			CircuitAddress load_addr = {
				start_addr.min + (offset+0)*word_width , 
				start_addr.min + (offset+1)*word_width
			};
			
			assert(offset <= addr_space);
			
			CircuitValue load_value = read_circuit_value(curr_state , load_addr);
			CircuitValue out_value = read_circuit_value(curr_state , out_addr);
			
			write_circuit_value(next_state , out_addr , load_value);
			push_signal_event(load_addr , out_addr , load_value , out_value , load_value , time , func , event_buffer);
			
			
			break;
		}
		
		case(COMP_CLOCK):
		{
			
			assert(input_count == 1);
			assert(output_count == 1);
			
			//CircuitAddress reset_addr = input_addrs[0];
			CircuitAddress clock_addr = output_addrs[0];
			
			//CircuitValue reset_value = read_circuit_value(curr_state , reset_addr);
			CircuitValue clock_value = read_circuit_value(curr_state , clock_addr);
			
			clock_value.v = 0;
			
			write_circuit_value(next_state , clock_addr , clock_value);
			
#if 0
			
			if(time)
			{
				u32 phase = time % reset_value.v;
				if(!phase)
				{
					clock_value.v = ~clock_value.v;
					push_signal_event({} , clock_addr , clock_value , time , func , event_buffer);
				}
			}
			
			write_circuit_value(next_state , clock_addr , clock_value);
#endif
			break;
			
		}
		
		
		default:
		{
			assert_zero;
		}
	}
}

#pragma optimize("" , PRAGMA_OPTIMISE)

void set_circuit_mem(
					 char * name , CircuitValue value , 
					 CircuitState * state , CircuitMemoryInfo * mem_info , u32 mem_info_size)
{
	
	CircuitMemoryInfo * info = get_circuit_mem_info(mem_info , mem_info_size , name);
	assert(info);
	
	write_circuit_value(state , info -> addr , value);
}


void push_time_marks(i32 time , EventBuffer * buffer)
{
	
	
	u32 event_index = buffer -> event_end_index;
	u32 power_index = buffer -> power_end_index;
	u32 fanout_index = buffer -> fanout_end_index;
	i32 min_time = buffer -> events[buffer -> event_start_index].time;
	
	u32 mark_capacity = buffer -> time_mark_capacity;
	u32 mark_start_index = buffer -> time_mark_start_index;
	u32 mark_end_index = buffer -> time_mark_end_index;
	CircuitEventTimeMark * marks = buffer -> time_marks;
	CircuitEventTimeMark * curr_mark = buffer -> curr_mark;
	
	u32 new_mark = 0;
	if(!curr_mark) new_mark = 1;
	else if(curr_mark -> time != time) new_mark = 1;
	
	u32 curr_index = mark_start_index;
	while(curr_index != mark_end_index)
	{
		
		CircuitEventTimeMark * mark = marks + curr_index;
		if(mark -> time < min_time)
		{
			
			if(mark_start_index != mark_end_index)
			{
				if(++mark_start_index >= mark_capacity) mark_start_index = 0;
			}
			
			*mark = {};
			break;
		}
		
		if(++curr_index == mark_capacity) curr_index = 0;
	}
	
	if(new_mark)
	{
		
		curr_mark = marks + mark_end_index++;
		curr_mark -> time = time;
		curr_mark -> event_start_index = event_index;
		curr_mark -> power_start_index = power_index;
		curr_mark -> fanout_start_index = fanout_index;
		
		if(mark_end_index >= mark_capacity) mark_end_index = 0;
		if(mark_start_index == mark_end_index)
		{
			if(++mark_start_index >= mark_capacity) mark_start_index = 0;
		}
		
	}
	
	if(curr_mark)
	{
		curr_mark -> event_end_index = event_index;
		curr_mark -> power_end_index = power_index;
		curr_mark -> fanout_end_index = fanout_index;
	}
	
	buffer -> time_mark_start_index = mark_start_index;
	buffer -> time_mark_end_index = mark_end_index;
	buffer -> curr_mark = curr_mark;
}


#if 0
void create_event_time_marks(EventBuffer * event_buffer)
{
	
	
	CircuitEvent * events = event_buffer -> events;
	u32 event_start_index = event_buffer -> event_start_index;
	u32 event_end_index = event_buffer -> event_end_index;
	u32 event_capacity = event_buffer -> event_capacity;
	u32 event_index = event_start_index;
	
	CircuitEventTimeMark * curr_mark = 0;
	CircuitEventTimeMark * marks = event_buffer -> time_marks;
	u32 mark_capacity = event_buffer -> time_mark_capacity;
	u32 mark_count = 0;
	
	i32 curr_time = -1;
	
	while(event_index != event_end_index)
	{
		
		CircuitEvent * event = events + event_index;
		i32 evt_time = event -> time;
		
		if(evt_time > curr_time)
		{
			assert(mark_count < mark_capacity);
			curr_mark = marks + mark_count++;
			
			curr_mark -> time = evt_time;
			curr_mark -> event_start_index = event_index;
			curr_time = evt_time;
		}
		
		curr_mark -> event_end_index = event_index;
		if(++event_index == event_capacity) event_index = 0;
	}
	
	event_buffer -> time_mark_count = mark_count;
}
#endif

void check_for_contention(EventBuffer * event_buffer)
{
#if 0	
	CircuitEvent * events = event_buffer -> events;
	u32 start_index = event_buffer -> start_index;
	u32 end_index = event_buffer -> end_index;
	u32 capacity = event_buffer -> capacity;
	u32 index = start_index;
	
	while(time_index != end_time_index)
	{
		
		//EventTimeMark
		u32 start_time
			
			while(index != end_index)
		{
			
			CircuitEvent * event = events + index;
			
			if(event -> type == CIRCUIT_SIGNAL_EVENT)
			{
				if(event -> dest == top_event.source && event -> time+1 == top_event.time)
				{
					assert(stack_size < stack_capacity);
					stack[stack_size++] = *event;
					top_stack_index++;
				}
			}
			
			if(++index == capacity) index = 0;
		}
	}
#endif
}






/*

A B | F
----+--
0 0 | 0
0 1 | 0
1 0 | 1
1 1 | 0


*/

u32 width_of(TestVectorExpr * expr)
{
	
	CircuitAddress addr = expr -> addr;
	CircuitValue value = expr -> value;
	u32 result = 1;
	
	if(addr.min || addr.max)
	{
		result = width_of(addr);
	}
	
	else
	{
		result = width_of(value);
	}
	
	return(result);
}


SignalLog * get_last_log(SignalLog * buffer , u32 size , u32 start_index , u32 end_index)
{
	
	SignalLog * result = 0;
	i32 time = 0;
	u32 index = start_index;
	
	while(index != end_index)
	{
		SignalLog * log = buffer + index; 
		if(log -> time >= time)
		{
			result = log;
		}
		
		if(++index >= size) index = 0;
	}
	
	return(result);
}

u32 get_initial_signal_events(CircuitEvent * stack , u32 stack_capacity , CircuitAddress addr , i32 time , EventBuffer * event_buffer)
{
	
	CircuitEvent * events = event_buffer -> events;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 capacity = event_buffer -> event_capacity;
	u32 index = end_index;
	u32 stack_size = 0;
	
	u32 addr_min = addr.min;
	u32 addr_max = addr.max;
	
	while(index != start_index)
	{
		
		CircuitEvent event = events[index];
		
		if(event.type == CIRCUIT_SIGNAL_EVENT)
		{
			CircuitAddress evt_addr = event.dest;
			u32 evt_min = evt_addr.min;
			u32 evt_max = evt_addr.max;
			i32 event_time = event.time;
			
			if( addr_min <= evt_min && evt_min <= addr_max &&
			   addr_min <= evt_max && evt_max <= addr_max &&
			   event_time == time)
			{
				assert(stack_size < stack_capacity);
				stack[stack_size++] = event;
			}
		}
		
		if(index-- == 0) index = capacity-1;
	}
	
	return(stack_size);
}


u32 find_signal_path(
					 CircuitAddress * path , u32 length , i32 time , EventBuffer * event_buffer ,
					 CircuitEvent ** trace_ptr , Mem * arena)
{
	
	
	
	// NOTE: the length of the backtrace excludes the end address because only the destination of a signal event
	// is considered, and therefore, the first signal to go into the backtrace (from the end) is the penultimate address.
	u32 trace_size = length-1;
	CircuitEvent * trace = mem_push(arena , CircuitEvent , sizeof(CircuitEvent)*trace_size);
	
	CircuitEvent * events = event_buffer -> events;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 capacity = event_buffer -> event_capacity;
	u32 index = end_index;
	u32 path_index = trace_size;
	
	while(index != start_index && path_index)
	{
		
		CircuitEvent * event = events + index;
		if(event -> type == CIRCUIT_SIGNAL_EVENT)
		{
			if( event -> dest == path[path_index] && 
			   event -> source == path[path_index-1] && 
			   event -> time == time)
			{
				trace[--path_index] = *event;
				time--;
			}
		}
		
		if(index-- == 0) index = capacity-1;
	}
	
	if(path_index)
	{
		mem_pop(arena , trace);
		trace_size = 0;
		trace = 0;
	}
	
	*trace_ptr = trace;
	return(trace_size);
}


#if 0
u32 find_critical_path(
					   CircuitAddress start , CircuitAddress end , i32 time , EventBuffer * event_buffer ,
					   CircuitEvent ** trace_ptr , u32 * trace_size_ptr , Mem * arena)
{
	
	CircuitEvent * trace = 0;
	u32 trace_size = 0;
	
	u32 trace_stack[16] = {};
	u32 trail_ends[16] = {};
	u32 trace_stack_capacity = array_size(trace_stack);
	u32 trace_stack_size = 0;
	
	CircuitEvent stack[64] = {};
	u32 stack_capacity = array_size(stack);
	u32 stack_size = get_initial_signal_events(stack , stack_capacity , end , time , event_buffer);
	
	CircuitEvent * events = event_buffer -> events;
	u32 start_index = event_buffer -> start_index;
	u32 end_index = event_buffer -> end_index;
	u32 capacity = event_buffer -> capacity;
	
	while(stack_size)
	{
		
		u32 top_stack_index = stack_size-1;
		u32 top_trace_index = trace_stack_size++;
		CircuitEvent top_event = stack[top_stack_index];
		u32 index = end_index;
		
		assert(top_event.type == CIRCUIT_SIGNAL_EVENT);
		assert(trace_stack_size < trace_stack_capacity);
		trace_stack[top_trace_index] = top_stack_index;
		trail_ends[top_trace_index] = 1;
		
		if(in_range(top_event.source , start) && trace_stack_size > trace_size)
		{
			
			if(trace_stack_size)
			{
				
				trace_size = trace_stack_size;
				trace = push_array(arena , CircuitEvent , trace_size);
				
				for(u32 trace_index = 0; trace_index < trace_size; trace_index++)
				{
					trace[trace_size-(trace_index+1)] = stack[trace_stack[trace_index]];
				}
			}
			
			break;
		}
		
		while(index != start_index)
		{
			
			CircuitEvent * event = events + index;
			
			if(event -> type == CIRCUIT_SIGNAL_EVENT)
			{
				if(event -> dest == top_event.source && event -> time+1 == top_event.time)
				{
					assert(stack_size < stack_capacity);
					stack[stack_size++] = *event;
					top_stack_index++;
				}
			}
			
			if(index-- == 0) index = capacity-1;
		}
		
		while(top_stack_index == trace_stack[top_trace_index])
		{
			
			stack_size = top_stack_index;
			trace_stack_size = top_trace_index;
			
			if(!top_stack_index || !top_trace_index)
			{
				break;
			}
			
			stack[top_stack_index--] = {};
			trace_stack[top_trace_index--] = {};
		}
	}
	
	*trace_ptr = trace;
	*trace_size_ptr = trace_size;
	return(trace_size);
}
#endif


inline
u32 in_event_stack(CircuitAddress addr , CircuitEvent * stack , u32 size)
{
	
	u32 result = 0;
	for(u32 i = 0; i < size; i++)
	{
		if(stack[i].source == addr)
		{
			result = 1;
			break;
		}
	}
	
	return(result);
}


u32 find_signal_trace(
					  CircuitAddress start , CircuitAddress end , i32 time_at_end , EventBuffer * event_buffer ,
					  CircuitEvent ** trace_ptr , u32 * trace_size_ptr , Mem * arena)
{
	
	
	CircuitEvent * trace = 0;
	u32 trace_size = 0;
	
	u32 trace_stack[32] = {};
	u32 trail_ends[32] = {};
	u32 trace_stack_capacity = array_size(trace_stack);
	u32 trace_stack_size = 0;
	
	CircuitEvent stack[64] = {};
	u32 stack_capacity = array_size(stack);
	u32 stack_size = get_initial_signal_events(stack , stack_capacity , end , time_at_end , event_buffer);
	
	CircuitEvent * events = event_buffer -> events;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 capacity = event_buffer -> event_capacity;
	
	//i32 time = time_at_end;
	
	while(stack_size)
	{
		
		
		u32 top_stack_index = stack_size-1;
		u32 top_trace_index = trace_stack_size++;
		CircuitEvent top_event = stack[top_stack_index];
		u32 index = end_index;
		
		
		assert(top_event.type == CIRCUIT_SIGNAL_EVENT);
		assert(trace_stack_size < trace_stack_capacity);
		trace_stack[top_trace_index] = top_stack_index;
		trail_ends[top_trace_index] = 1;
		
		if(in_range(top_event.source , start))
		{
			
			if(trace_stack_size)
			{
				trace_size = trace_stack_size;
				trace = mem_push(arena , CircuitEvent , sizeof(CircuitEvent)*trace_size);
				
				for(u32 trace_index = 0; trace_index < trace_size; trace_index++)
				{
					trace[trace_size-(trace_index+1)] = stack[trace_stack[trace_index]];
				}
			}
			
			break;
		}
		
		while(index != start_index)
		{
			
			CircuitEvent * event = events + index;
			
			if(event -> type == CIRCUIT_SIGNAL_EVENT)
			{
				if(event -> dest == top_event.source && event -> time <= top_event.time)
				{
					if(!in_event_stack(event -> source , stack , stack_size))
					{
						assert(stack_size < stack_capacity);
						stack[stack_size++] = *event;
						top_stack_index++;
					}
				}
			}
			
			if(index-- == 0) index = capacity-1;
		}
		
		while(top_stack_index == trace_stack[top_trace_index])
		{
			
			stack_size = top_stack_index;
			trace_stack_size = top_trace_index;
			
			if(!top_stack_index || !top_trace_index)
			{
				break;
			}
			
			stack[top_stack_index--] = {};
			trace_stack[top_trace_index--] = {};
		}
	}
	
	*trace_ptr = trace;
	*trace_size_ptr = trace_size;
	return(trace_size && trace);
}

u32 signal_trace_exists(
						CircuitAddress start , CircuitAddress end , i32 time_at_end , EventBuffer * event_buffer , Mem * arena)
{
	
	
	CircuitEvent * trace = 0;
	u32 trace_size = 0;
	
	u32 result = find_signal_trace(
								   start , end , time_at_end , event_buffer ,
								   &trace , &trace_size , arena
								   );
	
	return(result);
	
}


void calculate_trace_times(
						   SignalTrace * traces , u32 trace_count , 
						   SignalTraceResult * results , u32 result_count , 
						   i32 time , EventBuffer * event_buffer , Mem * scratch_arena)
{
	
	for(u32 trace_index = 0; trace_index < trace_count; trace_index++)
	{
		
		SignalTrace trace = traces[trace_index];
		
		CircuitAddress * path = trace.path;
		u32 path_length = trace.path_length;
		
		u32 start_index = trace.start_result_id - MIN_TRACE_ID;
		u32 end_index = trace.end_result_id - MIN_TRACE_ID;
		
		assert(start_index < result_count);
		assert(end_index < result_count);
		
		CircuitEvent * trace_events = 0;
		u32 trace_event_count = 0;
		
		results[start_index] = {};
		results[end_index] = {};
		
		trace_event_count = find_signal_path(
											 path , path_length , time , event_buffer , 
											 &trace_events , scratch_arena
											 );
		
		if(trace_events && trace_event_count)
		{
			results[start_index].time = trace_events[0].time;
			results[start_index].available = 1;
			
			results[end_index].time = trace_events[trace_event_count-1].time;
			results[end_index].available = 1;
		}
	}
}



CircuitValue get_cv_at(
					   CircuitAddress addr , 
					   i32 target_time , i32 curr_time , EventBuffer * event_buffer , 
					   CircuitState * curr_state)
{
	
	CircuitEvent * events = event_buffer -> events;
	u32 capacity = event_buffer -> event_capacity;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 curr_index = end_index;
	CircuitValue curr_value = read_circuit_value(curr_state , addr);
	u32 found = 1;
	
	while(curr_index != start_index)
	{
		
		CircuitEvent * event = events + curr_index;
		if(event -> type == CIRCUIT_SIGNAL_EVENT)
		{
			CircuitAddress event_addr = event -> dest;
			i32 event_time = event -> time;
			CircuitValue event_value = event -> result_value;
			
			u32 overlap_min = max(event_addr.min , addr.min);
			u32 overlap_max = min(event_addr.max , addr.max);
			
			if(overlap_min < overlap_max && target_time <= event_time && event_time <= curr_time)
			{
				
				CircuitAddress rel_addr = {overlap_min - addr.min , overlap_max - addr.min};
				CircuitAddress event_rel_addr = {overlap_min - event_addr.min , overlap_max - event_addr.min};
				
				CircuitValue read_value = read_circuit_memory_((u8*)&event_value.v , event_rel_addr);
				write_circuit_memory_((u8*)&curr_value.v , rel_addr , read_value);
				
				if(event_time <= target_time && target_time <= curr_time)
				{
					found = 1;
					break;
				}
				
				curr_time = event_time;
			}
		}
		
		if(curr_index-- == 0) curr_index = capacity-1;
	}
	
	assert(found);
	return(curr_value);
}


u32 get_cv_to_int_at(CircuitAddress addr , u32 sign , 
					 i32 target_time , i32 curr_time , EventBuffer * event_buffer , CircuitState * curr_state)
{
	
	CircuitValue cv = get_cv_at(addr , target_time , curr_time , event_buffer , curr_state);
	u32 result = cv_to_int(cv , sign);
	return(result);
}


u32 cv_eqls_at(
			   CircuitValue expected_value , CircuitAddress addr , 
			   i32 target_time , i32 curr_time , EventBuffer * event_buffer , 
			   CircuitState * curr_state)
{
	
	u32 result = 0;
	CircuitValue value = get_cv_at(addr , target_time , curr_time , event_buffer , curr_state);
	
	if(value.width)
	{
		result = (value.v == expected_value.v && value.width == expected_value.width);
	}
	
	return(result);
}






CircuitEvent * find_signal_event_at(EventBuffer * event_buffer , u32 type , i32 time , u32 count)
{
	
	CircuitEvent * result = 0;
	CircuitEvent * events = event_buffer -> events;
	u32 capacity = event_buffer -> event_capacity;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 curr_index = end_index;
	u32 counter = 0;
	
	while(curr_index != start_index)
	{
		
		CircuitEvent * event = events + curr_index;
		
		if(event -> type == type && event -> time == time)
		{
			if(counter++ == count)
			{
				result = event;
				break;
			}
		}
		
		if(curr_index-- == 0) curr_index = capacity-1;
	}
	
	return(result);
	
}


CircuitEvent * find_signal_event(EventBuffer * event_buffer , u32 type , u32 count)
{
	
	CircuitEvent * result = 0;
	CircuitEvent * events = event_buffer -> events;
	u32 capacity = event_buffer -> event_capacity;
	u32 start_index = event_buffer -> event_start_index;
	u32 end_index = event_buffer -> event_end_index;
	u32 curr_index = end_index;
	u32 counter = 0;
	
	while(curr_index != start_index)
	{
		
		CircuitEvent * event = events + curr_index;
		
		if(event -> type == type)
		{
			if(counter++ == count)
			{
				result = event;
				break;
			}
		}
		
		if(curr_index-- == 0) curr_index = capacity-1;
	}
	
	return(result);
	
}

EvalExprResult * eval_expr(
						   TestVectorExpr * expr , i32 time , 
						   EventBuffer * event_buffer , SignalTraceResult * traces , CircuitState * curr_state ,
						   Mem * result_arena , Mem * scratch_arena)
{
	
	u32 op_code = expr -> op_code;
	TestVectorExpr * lhs_expr = expr -> lhs;
	TestVectorExpr * rhs_expr = expr -> rhs;
	
	u32 available = 1;
	CircuitValue value_result = expr -> value;
	CircuitAddress addr_result = expr -> addr;
	
	EvalExprResult * lhs_result = 0;
	EvalExprResult * rhs_result = 0;
	
	CircuitValue lhs_val = {};
	CircuitValue rhs_val = {};
	
	TimeSpec time_spec = expr -> time_spec;
	i32 signal_time = 0;
	
	if(time_spec.id == 0)
	{
		signal_time = time;
	}
	
	else if(MIN_TRACE_ID <= time_spec.id && time_spec.id <= MAX_TRACE_ID)
	{
		SignalTraceResult trace = traces[time_spec.id - MIN_TRACE_ID];
		signal_time = trace.time;
		available &= trace.available;
	}
	
	else
	{
		CircuitEvent * event = find_signal_event(event_buffer , time_spec.id , time_spec.instance);
		available &= (event != 0);
		
		if(event)
		{
			signal_time = event -> time;
		}	
	}
	
	signal_time += time_spec.const_delay;
	
	if(lhs_expr)
	{
		
		lhs_result = eval_expr(
							   lhs_expr , time , 
							   event_buffer , traces , curr_state , 
							   result_arena , scratch_arena
							   );
		
		lhs_val = lhs_result -> value;
		available &= lhs_result -> available;
	}
	
	if(rhs_expr)
	{
		
		rhs_result = eval_expr(
							   rhs_expr , time , 
							   event_buffer , traces , curr_state , 
							   result_arena , scratch_arena
							   );
		
		rhs_val = rhs_result -> value;
		available &= rhs_result -> available;
	}
	
	if(available)
	{
		switch(op_code)
		{
			case(EXPR_OPERAND):
			{
				
				assert(!lhs_expr);
				assert(!rhs_expr);
				
				CircuitAddress addr = expr -> addr;
				CircuitValue value = expr -> value;
				
				
				if(addr.min || addr.max)
				{
					value = get_cv_at(
									  addr , signal_time , time , 
									  event_buffer , curr_state
									  );
					
					available |= value.width;
					addr_result = addr;
				}
				
				value_result = value;
				
#if 0
				SignalLog * buffer = expr -> backlog_buffer;
				u32 buffer_size = expr -> backlog_buffer_size;
				u32 start_index = expr -> backlog_start_index;
				u32 end_index = expr -> backlog_end_index;
				u32 curr_index = start_index;
				u32 found = 0;
				u32 max_time = 0;
				
				addr_result = addr;
				value_result = value;
				
				if(buffer && buffer_size && (addr.min || addr.max))
				{
					
					value_result = {0 , width_of(addr)};
					while(curr_index != end_index)
					{
						
						u32 curr_time = buffer[curr_index].time;
						CircuitValue curr_value = buffer[curr_index].value;
						assert(curr_value.width == width_of(addr));
						
						if(curr_time <= signal_time && curr_time >= max_time)
						{
							value_result = curr_value;
							max_time = curr_time;
							found = 1;
						}
						
						if(++curr_index >= buffer_size) curr_index = 0;
					}
					
					available &= found;
				}
#endif
				break;
			}
			
			case(EXPR_MEMORY):
			{
				
				
				CircuitAddress rhs_addr = rhs_expr -> addr;
				u32 word_width = width_of(rhs_addr);
				
				CircuitAddress load_addr = {
					rhs_addr.min + word_width*lhs_val.v , 
					rhs_addr.min + word_width*(lhs_val.v + 1)
				};
				
				value_result = get_cv_at(
										 load_addr , 
										 signal_time , time , event_buffer , 
										 curr_state
										 );
				
				available |= value_result.width;
				break;
			}
			
			case(EXPR_POS_EDGE):
			{
				
				
				CircuitAddress addr = expr -> addr;
				CircuitValue before_value = {};
				CircuitValue after_value = {};
				
				before_value = get_cv_at(
										 addr , 
										 signal_time-1 , time , 
										 event_buffer , curr_state
										 );
				
				after_value = get_cv_at(
										addr , 
										signal_time , time , 
										event_buffer , curr_state
										);
				
				available |= before_value.width && after_value.width;
				if(!before_value.v && after_value.v)
				{
					value_result.v = 1;
					value_result.width = 1;
				}
				
				break;
			}
			
			case(EXPR_PASS):
			{
				value_result.v = 1;
				value_result.width = 1;
				break;
			}
			
			case(EXPR_EQL):
			{
				
				assert(lhs_expr);
				assert(rhs_expr);
				assert(lhs_val.width == rhs_val.width);
				
				if(lhs_val.v == rhs_val.v)
				{
					value_result.v = 1;
					value_result.width = 1;
				}
				
				break;
			}
			
			case(EXPR_ADD):
			{
				
				assert(lhs_expr);
				assert(rhs_expr);
				assert(lhs_val.width == rhs_val.width);
				
				i32 lhs_v = cv_to_int(lhs_val , 1);
				i32 rhs_v = cv_to_int(rhs_val , 1);
				i32 res = lhs_v + rhs_v;
				value_result = int_to_cv(res , lhs_val.width ,  1);
				
				break;
			}
			
			case(EXPR_MULT):
			{
				
				assert(lhs_expr);
				assert(rhs_expr);
				assert(lhs_val.width == rhs_val.width);
				
				i32 lhs_v = cv_to_int(lhs_val , 1);
				i32 rhs_v = cv_to_int(rhs_val , 1);
				i32 res = lhs_v * rhs_v;
				value_result = int_to_cv(res , lhs_val.width ,  1);
				
				break;
			}
			
			case(EXPR_MOVING_BACKTRACE):
			{
				
				assert(lhs_expr);
				assert(rhs_expr);
				
				CircuitAddress start = lhs_expr -> addr;
				CircuitAddress end = rhs_expr -> addr;
				
				CircuitEvent * trace = 0;
				u32 trace_size = 0;
				
				if(find_signal_trace(
									 start , end , time , event_buffer ,
									 &trace , &trace_size , scratch_arena))
				{
					value_result.v = trace_size;
					value_result.width = 32;
				}
				
				break;
			}
			
			default:
			{
				assert_zero;
			}
		}
	}
	
	EvalExprResult * result = mem_push(result_arena , EvalExprResult , sizeof(EvalExprResult));
	
	result -> time = signal_time;
	result -> available = available;
	result -> lhs_result = lhs_result;
	result -> rhs_result = rhs_result;
	result -> value = value_result;
	result -> addr = addr_result;
	result -> op_code = op_code;
	
	if( signal_time == 0 && 
	   available == 0 && 
	   value_result.v == 0 && value_result.width == 0 && 
	   addr_result == CircuitAddress{} && 
	   lhs_result == 0 && rhs_result == 0)
	{
		//debug_break();
	}
	
	return(result);
}


u32 test_conditions(
					TestVectorExpr ** tests , u32 test_count , i32 time , u32 * available_ptr , 
					EventBuffer * event_buffer , SignalTraceResult * traces , CircuitState * curr_state ,
					EvalExprResult ** results , Mem * test_result_arena , Mem * scratch_arena)
{
	
	u32 available = 1;
	u32 result = 1;
	for(u32 test_index = 0; test_index < test_count; test_index++)
	{
		
		TestVectorExpr * expr = tests[test_index];
		EvalExprResult * eval_res = eval_expr(
											  expr , time , 
											  event_buffer , traces , curr_state , 
											  test_result_arena , scratch_arena
											  );
		
		result &= eval_res -> value.v;
		results[test_index] = eval_res;
		available &= eval_res -> available;
	}
	
	*available_ptr = available;
	
	return(result);
}

void perform_test_vectors(
						  TestVector * vecs , u32 vec_count , i32 time ,
						  EventBuffer * event_buffer , CircuitState * curr_state , 
						  SignalTrace * traces , u32 trace_count , SignalTraceResult * trace_results , u32 trace_result_count , 
						  TestVectorResult * results , u32 * result_count_ptr , u32 result_capacity , 
						  Mem * test_result_arena , Mem * scratch_arena)
{
	
	calculate_trace_times(
						  traces , trace_count , 
						  trace_results , trace_result_count , 
						  time , event_buffer , scratch_arena
						  );
	
	u32 result_count = *result_count_ptr;
	for(u32 test_index = 0; test_index < vec_count ; test_index++)
	{
		
		TestVector * curr_test = vecs + test_index;
		
		//TestVectorExpr * exprs = curr_test -> exprs;
		TestVectorExpr ** conseq = curr_test -> conseq;
		TestVectorExpr ** antec = curr_test -> antec;
		
		//u32 expr_count = curr_test -> expr_count;
		u32 antec_count = curr_test -> antec_count;
		u32 conseq_count = curr_test -> conseq_count;
		
		assert(result_count < result_capacity);
		TestVectorResult * curr_result = results + result_count++;
		
		EvalExprResult * eval_results = mem_end(*test_result_arena , EvalExprResult);
		EvalExprResult ** antec_results = curr_result -> antec_results;
		EvalExprResult ** conseq_results = curr_result -> conseq_results;
		u32 available = 0;
		
		u32 antec_result = test_conditions(
										   antec , antec_count , time , &available , 
										   event_buffer , trace_results , curr_state , 
										   antec_results , test_result_arena , scratch_arena
										   );
		
		u32 conseq_result = test_conditions(
											conseq , conseq_count , time , &available , 
											event_buffer , trace_results , curr_state , 
											conseq_results , test_result_arena , scratch_arena
											);
		
		curr_result -> active = antec_result && available;
		curr_result -> passed = !antec_result || conseq_result;
		
		curr_result -> eval_results = eval_results;
		curr_result -> eval_result_count = mem_end(*test_result_arena , EvalExprResult) - eval_results;
		
		curr_result -> conseq_count = conseq_count;
		curr_result -> antec_count = antec_count;
	}
	
	*result_count_ptr = result_count;
}











u32 get_initial_circuit_elements(
								 CircuitAddress * stack , u32 stack_capacity , CircuitAddress addr , 
								 HardwareInstr * instrs , u32 start_index , u32 end_index)
{
	
	u32 stack_size = 0;
	
	for(u32 instr_index = start_index; instr_index < end_index; instr_index++)
	{
		
		HardwareInstr curr_instr = instrs[instr_index];
		HardwareInstrInfo * info = get_hw_instr_info(curr_instr.function);
		u32 output_count = info -> output_count;
		CircuitAddress * outputs = curr_instr.outputs;
		
		if(find_addr(addr , outputs , output_count))
		{
			assert(stack_size < stack_capacity);
			stack[stack_size++] = addr;
		}
	}
	
	return(stack_size);
}

u32 find_critical_path(
					   CircuitAddress start , CircuitAddress end , 
					   HardwareInstr * instrs , u32 start_index , u32 end_index , 
					   CircuitAddress ** trace_ptr , Mem * arena)
{
	
	u32 trace_capacity = 64;
	u32 trace_size = 0;
	CircuitAddress * trace = mem_push(arena , CircuitAddress , sizeof(CircuitAddress)*trace_capacity);
	
	u32 trace_stack[16] = {};
	u32 trail_ends[16] = {};
	u32 trace_stack_capacity = array_size(trace_stack);
	u32 trace_stack_size = 0;
	
	CircuitAddress stack[64] = {};
	u32 stack_capacity = array_size(stack);
	u32 stack_size = 1;//get_initial_circuit_elements(stack , stack_capacity , end , instrs , start_index , end_index);
	
	stack[0] = end;
	
	while(stack_size)
	{
		
		
		u32 top_stack_index = stack_size-1;
		u32 top_trace_index = trace_stack_size++;
		CircuitAddress top_addr = stack[top_stack_index];
		
		//HardwareInstrInfo * top_instr_info = get_hw_instr_info(top_instr.function);
		//CircuitAddress * top_inputs = top_instr.inputs;
		//u32 top_input_count = top_instr_info -> input_count;
		
		assert(trace_stack_size < trace_stack_capacity);
		trace_stack[top_trace_index] = top_stack_index;
		trail_ends[top_trace_index] = 1;
		
		if(in_range(top_addr , start) && trace_stack_size > trace_size)
		{
			
			assert(trace_stack_size < trace_capacity);
			trace_size = trace_stack_size;
			
			for(u32 trace_index = 0; trace_index < trace_size; trace_index++)
			{
				trace[trace_size-(trace_index+1)] = stack[trace_stack[trace_index]];
			}
		}
		
		for(u32 instr_index = start_index; instr_index < end_index; instr_index++)
		{
			
			HardwareInstr * curr_instr = instrs + instr_index;
			HardwareInstrInfo * curr_info = get_hw_instr_info(curr_instr -> function);
			CircuitAddress * outputs = curr_instr -> outputs;
			CircuitAddress * inputs = curr_instr -> inputs;
			
			u32 output_count = curr_info -> output_count;
			u32 input_count = curr_info -> input_count;
			
			if(find_addr(top_addr , outputs , output_count))
			{
				
				for(u32 input_index = 0; input_index < input_count; input_index++)
				{
					assert(stack_size < stack_capacity);
					stack[stack_size++] = inputs[input_index];
					top_stack_index++;
				}
			}
		}
		
		while(top_stack_index == trace_stack[top_trace_index])
		{
			
			stack_size = top_stack_index;
			trace_stack_size = top_trace_index;
			
			if(!top_stack_index || !top_trace_index)
			{
				break;
			}
			
			stack[top_stack_index--] = {};
			trace_stack[top_trace_index--] = {};
		}
	}
	
	*trace_ptr = trace;
	return(trace_size);
}
