HierarchyFilesModulesSignalsTasksFunctionsHelp
12
/****************************************************************
 ---------------------------------------------------------------
     Copyright 1999 Sun Microsystems, Inc., 901 San Antonio
     Road, Palo Alto, CA 94303, U.S.A.  All Rights Reserved.
     The contents of this file are subject to the current
     version of the Sun Community Source License, picoJava-II
     Core ("the License").  You may not use this file except
     in compliance with the License.  You may obtain a copy
     of the License by searching for "Sun Community Source
     License" on the World Wide Web at http://www.sun.com.
     See the License for the rights, obligations, and
     limitations governing use of the contents of this file.

     Sun, Sun Microsystems, the Sun logo, and all Sun-based
     trademarks and logos, Java, picoJava, and all Java-based
     trademarks and logos are trademarks or registered trademarks 
     of Sun Microsystems, Inc. in the United States and other
     countries.
 ----------------------------------------------------------------
******************************************************************/


[Up: iu ifu]
module ifu (

	ibuff_0,
	ibuff_1,
	ibuff_2,
	ibuff_3,
	ibuff_4,
	ibuff_5,
	ibuff_6,
	fetch_drty,
	fetch_valid,
	fetch_len0,
	fetch_len1,
	fetch_len2,
	fetch_len3,
	fetch_len4,
	fetch_len5,
	fetch_len6,
	iu_psr_fle,
	sc_bottom,
	lvars,
	squash_fold,
	kill_vld_d,
	hold_d,
	hold_r,
	hold_e,
	hold_c,
	iu_trap_c,
	sin,
	sm,	
	reset_l,
	clk,
	group_1_r,
	group_2_r,
	group_3_r,
	group_4_r,
	group_5_r,
	group_6_r,
	group_7_r,
	group_8_r,
	group_9_r,
	no_fold_r,
	fold_r,
	opcode_1_rs1_r,
	opcode_2_rs1_r,
	offset_1_rs1_r,
	offset_2_rs1_r,
	valid_rs1_r,
	help_rs1_r,
	lv_rs1_r,
	type_rs1_r,
	lvars_acc_rs1_r,
	st_index_op_rs1_r,
	reverse_ops_rs1_r,
	update_optop_r,
	opcode_1_rs2_r,
	opcode_2_rs2_r,
	offset_1_rs2_r,
	offset_2_rs2_r,
	valid_rs2_r,
	lv_rs2_r,
	lvars_acc_rs2_r,
//	Generate different versions of opcode_1_op to
// 	improve timing

	opcode_1_op_r_rcu,
	opcode_1_op_r_ex,
	opcode_1_op_r_fpu,
	opcode_1_op_r_ucode,

	opcode_2_op_r,
	opcode_3_op_r,
	opcode_4_op_r,
	opcode_5_op_r,
	valid_op_r_rcu,
	valid_op_r_ucode,
	valid_op_r_pipe,
	valid_op_r_ex,
	valid_op_r_fpu,
	opcode_1_rsd_r,
	opcode_2_rsd_r,
	offset_rsd_r,
	valid_rsd_r,
	offset_pc_br_r,
	drty_inst_r,
	put_field2_quick_r,
	iu_shift_d,
	so


);

input	[7:0]	ibuff_0;		// 1st entry of Ibuffer
input	[7:0]	ibuff_1;                // 2nd entry of Ibuffer		
input	[7:0]	ibuff_2;                // 3rd entry of Ibuffer
input	[7:0]	ibuff_3;                // 4th entry of Ibuffer
input	[7:0]	ibuff_4;                // 5th entry of Ibuffer
input	[7:0]	ibuff_5;                // 6th entry of Ibuffer
input	[7:0]	ibuff_6;                // 7th entry of Ibuffer
input	[6:0]	fetch_drty;		// Dirty bits of the 7 entries in Ibuffer
input	[6:0]	fetch_valid;		// Valid bits of the 7 entries in Ibuffer
input	[3:0]	fetch_len0;		// Len. of the instr. corresp. to 1st byte in Ibuffer
input	[3:0]	fetch_len1;             // Len. of the instr. corresp. to 2nd byte in Ibuffer
input	[3:0]	fetch_len2;             // Len. of the instr. corresp. to 3rd byte in Ibuffer
input	[3:0]	fetch_len3;             // Len. of the instr. corresp. to 4th byte in Ibuffer
input	[3:0]	fetch_len4;             // Len. of the instr. corresp. to 5th byte in Ibuffer
input	[3:0]	fetch_len5;             // Len. of the instr. corresp. to 6th byte in Ibuffer
input	[3:0]	fetch_len6;             // Len. of the instr. corresp. to 7th byte in Ibuffer
input		iu_psr_fle;		// Bit in PSR reg. to enable folding
input	[31:0]	sc_bottom;		// Stack Bottom Register
input	[31:0]	lvars;			// Local Var. Register
input		squash_fold;		// Signal from pipeline control to disable
					// folding.
input		kill_vld_d;		// Signal from pipeline control to kill valids
					// in the d-stage for branches and traps
input		hold_d;			// Hold for D-stage
input		hold_r;			// Hold for R-stage
input		hold_e;			// Hold for E-stage
input		hold_c;			// Hold for C-stage
input		iu_trap_c;		// Trap in C-stage
input		sin;			// IFU Scan In Port
input		sm;			// IFU Scan Enable Port
input		reset_l;			// Reset
input		clk;			// Clock
output		group_1_r;		// Indicates  group_1 folding
output		group_2_r;              // Indicates  group_2 folding
output		group_3_r;              // Indicates  group_3 folding
output		group_4_r;              // Indicates  group_4 folding
output		group_5_r;              // Indicates  group_5 folding
output		group_6_r;              // Indicates  group_6 folding
output		group_7_r;              // Indicates  group_7 folding
output		group_8_r;              // Indicates  group_8 folding
output		group_9_r;              // Indicates  group_9 folding
output		no_fold_r;		// Indicates thatonly one instr. is issued
output		fold_r;			// Indicates that more than one instr is issued
output	[7:0]	opcode_1_rs1_r;		// 1st byte of opcode in RS1 stage
output	[7:0]	opcode_2_rs1_r;		// 2nd byte of opcode in RS1 stage
output	[7:0]	offset_1_rs1_r;		// 1st byte of offset in RS1 stage
output	[7:0]	offset_2_rs1_r;		// 2nd byte of offset in RS1 stage
output		valid_rs1_r;		// Indicates a valid LV + Long Load in RS1
output		help_rs1_r;		// Indicates multi-rstage operation in RS1
output		lv_rs1_r;		// Indicates an LV op in RS1

output	[7:0]	type_rs1_r;		// indicates the type of long op in rs1
                                	// type[0] = long or double load
                                	// type[1] = long or double store
                                	// type[2] = long add, and, sub, or, xor or 
					// neg operation
                                	// type[3] = double add, sub, rem, mul, div 
					// or compare operation
                                	// type[4] = long shift left operation
                                	// type[5] = long shift right operation
                                	// type[6] = priv write operations
                                	// type[7] = long compares

output		lvars_acc_rs1_r;	// Indicates that a scache access in RS1 uses lvars 
output		st_index_op_rs1_r;	// Indicates that there's st_*_index type of op in RS1
output		reverse_ops_rs1_r;	// Used to fix an Xface problem between FPU and IU
output		update_optop_r;		// Indicates that there's a priv_update_optop op
output	[7:0]	opcode_1_rs2_r;		// 1st byte of opcode in RS2 stage
output	[7:0]	opcode_2_rs2_r;		// 2nd byte of opcode in RS2 stage
output	[7:0]	offset_1_rs2_r;		// 1st byte of offset in RS2 stage
output	[7:0]	offset_2_rs2_r;		// 2nd byte of offset in RS2 stage
output		valid_rs2_r;		// Indicates a valid LV op in RS2
output		lv_rs2_r;		// Indicates a valid LV in RS2
output		lvars_acc_rs2_r;	// Indicates that a scache access in RS2 uses lvars 
output	[7:0]	opcode_1_op_r_rcu;	// 1st byte of opcode in OP stage to be used in RCU
output	[7:0]	opcode_1_op_r_ex;	// 1st byte of opcode in OP stage to be used in EX
output	[7:0]	opcode_1_op_r_fpu;	// 1st byte of opcode in OP stage to be used in FPU
output	[7:0]	opcode_1_op_r_ucode;	// 1st byte of opcode in OP stage to be used in UCODE
output	[7:0]	opcode_2_op_r;		// 2nd byte of opcode in OP stage
output	[7:0]	opcode_3_op_r;		// 3rd byte of opcode in OP stage
output	[7:0]	opcode_4_op_r;		// 4th byte of opcode in OP stage
output	[7:0]	opcode_5_op_r;		// 5th byte of opcode in OP stage

// Generating multiple versions of valid_op_r to improve timing on it 

output		valid_op_r_rcu;		// Indicates a valid OP in OP stage: used in RCU
output		valid_op_r_ucode;	// Indicates a valid OP in OP stage: used in Ucode
output		valid_op_r_pipe;	// Indicates a valid OP in OP stage: used in Pipe
output		valid_op_r_ex;		// Indicates a valid OP in OP stage: used in EX
output		valid_op_r_fpu;		// Indicates a valid OP in OP stage: used in FPU

output	[7:0]	opcode_1_rsd_r;		// 1st byte of opcode in RSd stage
output	[7:0]	opcode_2_rsd_r;		// 2nd byte of opcode in RSd stage
output	[7:0]	offset_rsd_r;		// offset in RSd stage
output		valid_rsd_r;		// Indicates a valid MEM operation in RSd stage
output	[2:0]	offset_pc_br_r;		// Gives the offset from PC, where the OP is there
output		drty_inst_r;		// Indicates that first instruction is a dirty one
output		put_field2_quick_r;	// Indicates a put_field_quick operation
output	[7:0]	iu_shift_d;		// tells how many bytes are consumed by folding
output		so;			// IFU Scan Out Port



wire	[7:0]	accum_len0;
wire	[7:0]	accum_len1;
wire	[7:0]	accum_len2;
wire	[7:0]	accum_len3;
wire	[3:0]	dec_valid;
wire	[5:0]	ex_len_first_inst;
wire	[5:0]	inst_1_type;
wire	[5:0]	inst_2_type;
wire	[5:0]	inst_3_type;
wire	[5:0]	inst_4_type;
wire	[7:0]	offset_1_rs1_int;
wire	[7:0]	offset_1_rs2_int;
wire	[7:0]	opcode_1_rs1;
wire	[7:0]	opcode_2_rs1;
wire	[7:0]	opcode_1_rs2;
wire	[7:0]	opcode_2_rs2;
wire	[7:0]	opcode_1_op_r_gen;
wire	[2:0]	instrs_folded;
wire	[2:0]	instrs_folded_r;
wire	[2:0]	instrs_folded_e;
wire	[2:0]	instrs_folded_c;
wire	[2:0]	instrs_folded_w;
wire		valid_op_r_gen;
wire		reverse_ops_rs1;
wire		update_optop;
wire		put_field2_quick;
wire		fold;
wire		valid_rs1_int1;
wire		group_1;
wire		group_2;
wire		group_3;
wire		group_4;
wire		group_5;
wire		group_6;
wire		group_7;
wire		group_8;
wire		group_9;
wire		valid_rs1_int;
wire		lv_rs1_int;
wire		lvars_acc_rs1_int;
wire		st_index_op_rs1;
wire		valid_rs2_int;
wire		lv_rs2_int;
wire		lv_rs2_int1;
wire		lvars_acc_rs2_int1;
wire		lvars_acc_rs2_int;
wire		vld_drty_entries;
wire		scache_miss_int;
wire		scache_miss;
wire		fold_enable;
wire		fold_1_inst;
wire		fold_2_inst;
wire		fold_3_inst;
wire		fold_4_inst;
wire		not_valid;
wire	[4:0]	offset_sel_rs1;
wire		valid_rs1;
wire		mem_op_rs1;
wire		help_rs1;
wire	[7:0]	type_rs1;
wire		lv_rs1;
wire	[4:0]	offset_sel_rs2;
wire	[7:0]	offset_1_rs1;
wire	[7:0]	offset_1_rs2;
wire	[7:0]	offset_2_rs1;
wire	[7:0]	offset_2_rs2;
wire		valid_rs2;
wire	[2:0]	op_sel;
wire	[7:0]	opcode_inst2_1;
wire	[7:0]	opcode_inst2_2;
wire	[7:0]	opcode_inst2_3;
wire	[7:0]	opcode_inst3_1;
wire	[7:0]	opcode_inst3_2;
wire	[7:0]	opcode_inst3_3;
wire	[7:0]	opcode_inst4_1;
wire	[7:0]	opcode_inst4_2;
wire	[7:0]	opcode_inst4_3;
wire	[7:0]	opcode_1_op;
wire	[7:0]	opcode_2_op;
wire	[7:0]	opcode_3_op;
wire		valid_op;
wire		valid_rsd;
wire	[7:0]	rs2_inst_1;
wire	[7:0]	rs2_inst_2;
wire	[7:0]	rs2_inst_3;
wire	[3:0]	opcode_sel_rsd;
wire	[7:0]	opcode_1_rsd;
wire	[7:0]	opcode_2_rsd;
wire	[7:0]	opcode_3_rsd;
wire	[7:0]	offset_rsd_int;
wire	[7:0]	offset_rsd;
wire	[3:0]	offset_rsd_ctl;
wire	[4:0]	offset_sel_rsd;
wire	[7:0]	offset_pc_br_dec;
wire	[2:0]	offset_pc_br;
wire	[1:0]	swap_rs1_rs2;
wire		drty_inst;
wire		drty_inst_1;
wire		drty_inst_2;
wire		drty_inst_3;
wire		drty_inst_4;
wire		drty_inst_5;
wire		lv_rs2;
wire		lvars_acc_rs1;
wire		lvars_acc_rs2;


// Decode the exact length of the first inst.

ex_len_dec	ex_len_dec (.opcode(ibuff_0),
				.valid(fetch_valid[4:0]),
				.len0(ex_len_first_inst[0]),
				.len1(ex_len_first_inst[1]),
				.len2(ex_len_first_inst[2]),
				.len3(ex_len_first_inst[3]),
				.len4(ex_len_first_inst[4]),
				.len5(ex_len_first_inst[5]) );
				
// Instantiate accumulated length detector
// This detector will output four lengths, accum_len0..accum_len3, 
// accum_len0 -> length of the 1st instr
// accum_len1 -> length of the 1st instr + length of the 2nd instr
// The above four lengths correspond to four prospective instructions to be folded

length_dec	length_dec (.fetch_len0(fetch_len0),
				.fetch_len1(fetch_len1),
				.fetch_len2(fetch_len2),
				.fetch_len3(fetch_len3),
				.fetch_len4(fetch_len4),
				.fetch_len5(fetch_len5),
				.fetch_len6(fetch_len6),
				.fold_4_inst(fold_4_inst),
				.fold_3_inst(fold_3_inst),
				.fold_2_inst(fold_2_inst),
				.fold_1_inst(fold_1_inst),
				.not_valid(not_valid),
				.ex_len_first_inst(ex_len_first_inst),
				.hold_d	(hold_d),
				.iu_shift_d(iu_shift_d[7:0]),
				.accum_len0(accum_len0),
				.accum_len1(accum_len1),
				.accum_len2(accum_len2),
				.accum_len3(accum_len3) );

// Instantiate valid decoder
// This will output four valids. dec_valid (decoder valids) corrseponding
// to four prospective instructions to be folded

valid_dec	valid_dec (.fetch_valid(fetch_valid),
				.accum_len0(accum_len0),
				.accum_len1(accum_len1),
				.accum_len2(accum_len2),
				.ex_len_first_inst(ex_len_first_inst),
				.fetch_len1(fetch_len1),
				.fetch_len2(fetch_len2),
				.fetch_len3(fetch_len3),
				.fetch_len4(fetch_len4),
				.fetch_len5(fetch_len5),
				.fetch_len6(fetch_len6),
				.dec_valid(dec_valid) );
				

// Instantiate folding decoder
//  This wil output folding types, inst_0..3_type of four prospective instructions
// to be folded	

// inst_0_type[0] =  Non Foldable
// inst_0_type[1] =  Local Variable
// inst_0_type[2] =  Operation
// inst_0_type[3] =  Break Group2
// inst_0_type[4] =  Break Group1
// inst_0_type[5] =  Memory Store



fold_dec	fold_dec (.ibuff_0(ibuff_0),
				.ibuff_1(ibuff_1),
				.ibuff_2(ibuff_2),
				.ibuff_3(ibuff_3),
				.ibuff_4(ibuff_4),
				.ibuff_5(ibuff_5),
				.ibuff_6(ibuff_6),
				.accum_len0(accum_len0),
				.accum_len1(accum_len1),
				.accum_len2(accum_len2),
				.type_0(inst_1_type),
				.type_1(inst_2_type),
				.type_2(inst_3_type),
				.type_3(inst_4_type) );
				

// Determine Folding Enable
// Folding is disbaled if:
// 1. iu_psr_fle is 0
// 2. scache_miss  and there's scache access in one or both of RS1 and RS2
// 3. any valid dirty entries in the ibuff

// determine scache miss
// since we dont have time to calculate exact offset and subtract it from lvars and
// then compare it against sc_bottom, we'll use  a littl bit pessimistic approach

comp_gr_32	scache_miss_comp(.in1(lvars),
				.in2(sc_bottom),
				.gr(scache_miss_int) );

// Qualify scache_miss_int with local var accesses in RS1 and RS2
// to generate actual scache_miss signal
assign	scache_miss = scache_miss_int & (lvars_acc_rs1_int | lvars_acc_rs2_int1);

// If there are any valid dirty entries in the top 7 bytes of ibuffer
// donot fold

assign	vld_drty_entries = (	(fetch_valid[0] & fetch_drty[0]) | 
				(fetch_valid[1] & fetch_drty[1]) |
				(fetch_valid[2] & fetch_drty[2]) |
				(fetch_valid[3] & fetch_drty[3]) |
				(fetch_valid[4] & fetch_drty[4]) |
				(fetch_valid[5] & fetch_drty[5]) |
				(fetch_valid[6] & fetch_drty[6]) ) ;

// Optimization: Whenever there are no scache accesses in RS1 and RS2
// Ignore scache hit while folding; This will greatly improve the number
// of instructions being folded

// assign	fold_enable = iu_psr_fle & scache_hit & 
// 			!(vld_drty_entries)  &!squash_fold;

assign	fold_enable = iu_psr_fle & !scache_miss &
 			!(vld_drty_entries)  &!squash_fold;

// Use folding logic to determine how many instructions can be folded
// and what group the folded instructions fall into
// group_1 : LV LV MEM OP
// group_2 : LV LV OP
// group_3 : LV LV BG2
// group_4 : LV OP MEM
// group_5 : LV BG2
// group_6 : LV BG1
// group_7 : LV OP
// group_8 : LV MEM
// group_9 : OP MEM

fold_logic	fold_logic (.F0(inst_1_type),
				.F1(inst_2_type),
				.F2(inst_3_type),
				.F3(inst_4_type),
				.V0(dec_valid[0]),
				.V1(dec_valid[1]),
				.V2(dec_valid[2]),
				.V3(dec_valid[3]),
				.FOE(fold_enable),
				.notvalid(not_valid),
				.fold1(fold_1_inst),
				.fold2(fold_2_inst),
				.fold3(fold_3_inst),
				.fold4(fold_4_inst),
				.gr1(group_1),
				.gr2(group_2),
				.gr3(group_3),
				.gr4(group_4),
				.gr5(group_5),
				.gr6(group_6),
				.gr7(group_7),
				.gr8(group_8),
				.gr9(group_9) );

// Flop the group information

ff_sre	flop_gr_1(.out(group_1_r),
		.din((group_1 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_2(.out(group_2_r),
		.din((group_2 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_3(.out(group_3_r),
		.din((group_3 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_4(.out(group_4_r),
		.din((group_4 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_5(.out(group_5_r),
		.din((group_5 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_6(.out(group_6_r),
		.din((group_6 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_7(.out(group_7_r),
		.din((group_7 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_8(.out(group_8_r),
		.din((group_8 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_gr_9(.out(group_9_r),
		.din((group_9 & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));

assign	fold = (fold_2_inst | fold_3_inst | fold_4_inst);

ff_sre	flop_fold(.out(fold_r),
		.din((fold & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
ff_sre	flop_no_fold(.out(no_fold_r),
		.din((fold_1_inst & !kill_vld_d)),
		.clk(clk),
		.enable(!(hold_r & !kill_vld_d)),
		.reset_l(reset_l));
			
		
// Instantiate the main decoder which will provide offset_selects for
// rs1, rs2, valids for them, etc ...

main_dec	main_dec (.ibuff_0(ibuff_0),
				.ibuff_1(ibuff_1),
				.ibuff_2(ibuff_2),
				.ibuff_3(ibuff_3),
				.ibuff_4(ibuff_4),
				.ibuff_5(ibuff_5),
				.ibuff_6(ibuff_6),
				.fetch_valid(fetch_valid[6:0]),
				.accum_len0(accum_len0),
				.accum_len1(accum_len1),
				.accum_len2(accum_len2),
				.offset_rsd_ctl(offset_rsd_ctl),
				.offset_sel_rs1(offset_sel_rs1),
				.valid_rs1(valid_rs1_int1),
				.mem_op(mem_op_rs1),
				.help_rs1(help_rs1),
				.type(type_rs1),
				.lv_rs1(lv_rs1_int),
				.lvars_acc_rs1(lvars_acc_rs1_int),
				.st_index_op_rs1(st_index_op_rs1),
				.reverse_ops_rs1(reverse_ops_rs1),
				.update_optop(update_optop),
				.offset_sel_rs2(offset_sel_rs2),
				.lv_rs2(lv_rs2_int1),
				.lvars_acc_rs2(lvars_acc_rs2_int1),
				.offset_sel_rsd(offset_sel_rsd) );
				
// Whenever there's no foldable inst in the first spot  make lvars_acc_rs2_int1 
// and lv_rs2_int1 zeroes. This is because in case of combinations like lcmp lload 
// or lcmp iload, we have lvars_acc_rs2_int1 and lv_rs2_int1 are high
// To prevent this we and them !(fold_1_inst);
assign lvars_acc_rs2_int = lvars_acc_rs2_int1 & !fold_1_inst;
assign	lv_rs2_int = lv_rs2_int1 & !fold_1_inst;

// Provide opcode, offset, valids, etc .. for RS1 stage

// Opcode first

// Imp. Note1
// Because of some arch. oversight, incase of groups 
// LV LV OP MEM -> gropu 1
// LV LV OP  -> group 2
// LV LV BG2  -> group 3
// We used to use first LV in RS1 and 2nd one in RS2,
// which is not true. It should be otherway around.
// To incorporate this change we flip i/ps to RS1 and RS2
// for these 3 gropus

assign	swap_rs1_rs2[1] = (group_1 | group_2 | group_3 );
assign	swap_rs1_rs2[0] = !swap_rs1_rs2[1];
mux2_8		mux_opcode_1_rs1(.out(opcode_1_rs1),
				.in0(ibuff_0),
				.in1(rs2_inst_1),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_opcode_1_rs1(.out(opcode_1_rs1_r),
					.din(opcode_1_rs1),
					.clk(clk),
					.enable(!hold_r));

mux2_8		mux_opcode_2_rs1_swap(.out(opcode_2_rs1),
				.in0(ibuff_1),
				.in1(rs2_inst_2),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_opcode_2_rs1(.out(opcode_2_rs1_r),
					.din(opcode_2_rs1),
					.clk(clk),
					.enable(!hold_r));

// Offset now

mux2_8		mux_opcode_2_rs1(.out(offset_1_rs1),
				.in0(offset_1_rs1_int),
				.in1(offset_1_rs2_int),
				.sel(swap_rs1_rs2) );

mux5_8		mux_offset_rs1 (.out(offset_1_rs1_int),
				.in0(8'b00000000),
				.in1(8'b00000001),
				.in2(8'b00000010),
				.in3(8'b00000011),
				.in4(ibuff_1),
				.sel(offset_sel_rs1) );

ff_se_8		flop_offset_1_rs1(.out(offset_1_rs1_r),
					.din(offset_1_rs1),
					.clk(clk),
					.enable(!hold_r));

mux2_8		mux_offset_2_rs2_swap(.out(offset_2_rs1),
				.in0(ibuff_2),
				.in1(rs2_inst_3),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_offset_2_rs1(.out(offset_2_rs1_r),
					.din(offset_2_rs1),
					.clk(clk),
					.enable(!hold_r));

// valid, dup, type, help, etc .. for RS1 stage

// Look at Imp. Note1
// Whenever there's a kill_vld_d signal is there, kill all valids
// Also propogate these valids even when there is hold_r
assign	valid_rs1_int = valid_rs1_int1 & !kill_vld_d;

mux2	mux_valid_rs1 (.out(valid_rs1),
			.in0(valid_rs1_int),
			.in1(valid_rs2_int),
			.sel(swap_rs1_rs2) );

ff_sre	flop_valid_rs1 (.out(valid_rs1_r),
			.din(valid_rs1),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));
			

ff_sre	flop_help_rs1 (.out(help_rs1_r),
			.din((help_rs1 & !kill_vld_d)),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

mux2    mux_lv_rs1 (.out(lv_rs1),
                        .in0(lv_rs1_int),
                        .in1(lv_rs2_int),
                        .sel(swap_rs1_rs2) );
 
ff_sre	flop_lv_rs1 (.out(lv_rs1_r),
			.din((lv_rs1 & !kill_vld_d)),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

// Look at Imp. Note1
mux2    mux_lvacc_rs1 (.out(lvars_acc_rs1),
                        .in0(lvars_acc_rs1_int),
                        .in1(lvars_acc_rs2_int),
                        .sel(swap_rs1_rs2) );
 

ff_sre	flop_lv_acc_rs1 (.out(lvars_acc_rs1_r),
			.din((lvars_acc_rs1 & !kill_vld_d)),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

ff_sre_8	flop_type_rs1 (.out(type_rs1_r),
			.din((type_rs1&{8{!kill_vld_d}})),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

ff_sre		flop_rev_ops (.out(reverse_ops_rs1_r),
			.din(reverse_ops_rs1 & !kill_vld_d),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

ff_sre		flop_st_op (.out(st_index_op_rs1_r),
			.din((st_index_op_rs1 & !kill_vld_d)),
			.clk(clk),
			.enable(!(hold_r & !kill_vld_d)),
			.reset_l(reset_l));

ff_sre		flop_optop (.out(update_optop_r),
			.din((update_optop & !kill_vld_d)),
			.clk(clk),
			.enable(!(hold_r & !kill_vld_d)),
			.reset_l(reset_l));

// Opcode, offset, valid for RS2 stage

// opcode first

// Depending on the length of the first instruction, select the first three
// bytes of the instr. which will go into RS2 

mux4_24		mux_rs2_inst ( .out({rs2_inst_1,rs2_inst_2,rs2_inst_3}),
				.in0(24'b0),
				.in1({ibuff_1,ibuff_2,ibuff_3}),
				.in2({ibuff_2,ibuff_3,ibuff_4}),
				.in3({ibuff_3,ibuff_4,ibuff_5}),
				.sel(accum_len0[3:0]) );

//See note Imp. Note1
mux2_8		mux_opcode_1_rs2(.out(opcode_1_rs2),
				.in0(rs2_inst_1),
				.in1(ibuff_0),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_opcode_1_rs2(.out(opcode_1_rs2_r),
					.din(opcode_1_rs2),
					.clk(clk),
					.enable(!hold_r));

mux2_8		mux_opcode_2_rs2_swap(.out(opcode_2_rs2),
				.in0(rs2_inst_2),
				.in1(ibuff_1),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_opcode_2_rs2(.out(opcode_2_rs2_r),
					.din(opcode_2_rs2),
					.clk(clk),
					.enable(!hold_r));

// Offset now

mux2_8		mux_offset_2_rs2(.out(offset_1_rs2),
				.in0(offset_1_rs2_int),
				.in1(offset_1_rs1_int),
				.sel(swap_rs1_rs2) );

mux5_8		mux_offset_rs2 (.out(offset_1_rs2_int),
				.in0(8'b00000000),
				.in1(8'b00000001),
				.in2(8'b00000010),
				.in3(8'b00000011),
				.in4(rs2_inst_2),
				.sel(offset_sel_rs2) );

ff_se_8		flop_offset_1_rs2(.out(offset_1_rs2_r),
					.din(offset_1_rs2),
					.clk(clk),
					.enable(!hold_r));

mux2_8		mux_opcode_2_rs2(.out(offset_2_rs2),
				.in0(rs2_inst_3),
				.in1(ibuff_2),
				.sel(swap_rs1_rs2) );

ff_se_8		flop_offset_2_rs2(.out(offset_2_rs2_r),
					.din(offset_2_rs2),
					.clk(clk),
					.enable(!hold_r));

// Valid: There's a valid RS2 only for the groups, g1, g2 and g3
// Whenever there's a kill_vld_d signal is there, kill all valids
// Also propogate these valids even when there is hold_r
assign	valid_rs2_int =	(group_1 | group_2 | group_3) & !kill_vld_d ;

// Look at Imp. Note1
mux2    mux_valid_rs2 (.out(valid_rs2),
                        .in0(valid_rs2_int),
                        .in1(valid_rs1_int),
                        .sel(swap_rs1_rs2) );
 


ff_sre   flop_vld_rs2 (.out(valid_rs2_r),
                        .din(valid_rs2),
                        .reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
                        .clk(clk));

mux2    mux_lv_rs2 (.out(lv_rs2),
                        .in0(lv_rs2_int),
                        .in1(lv_rs1_int),
                        .sel(swap_rs1_rs2) );
 
ff_sre	flop_lv_rs2 (.out(lv_rs2_r),
			.din((lv_rs2 & !kill_vld_d)),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));


// Look at Imp. Note1
mux2    mux_lvacc_rs2 (.out(lvars_acc_rs2),
                        .in0(lvars_acc_rs2_int),
                        .in1(lvars_acc_rs1_int),
                        .sel(swap_rs1_rs2) );

ff_sre	flop_lvars_acc_rs2 (.out(lvars_acc_rs2_r),
                        .din((lvars_acc_rs2 & !kill_vld_d)), 
                        .reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
                        .clk(clk));




// Opcode and valid for operation stage

// Depending on Folding groups, op can be either the 1st,2nd or 3rd inst.
// in the group.

// For groups 1..3, op will be the third inst.

assign	op_sel[2] = (group_1 | group_2 | group_3);

// For groups 4..8, op will be the 2nd inst.

assign	op_sel[1] = (group_4 | group_5 | group_6 | group_7 | group_8);

// Select the 1st inst. otherwise

assign	op_sel[0] = !(|op_sel[2:1]);

// opcode_inst2_1 will be opcode's first byte if the op. is 2nd inst and so-on

mux4_24		mux_op_2 (.out({opcode_inst2_1,opcode_inst2_2,opcode_inst2_3}),
				.in0(24'b0),
				.in1({ibuff_1,ibuff_2,ibuff_3}),
				.in2({ibuff_2,ibuff_3,ibuff_4}),
				.in3({ibuff_3,ibuff_4,ibuff_5}),
				.sel(accum_len0[3:0]) );

// opcode_inst3_1 will be opcode's first byte if the op. is 3nd inst and so-on

mux8_24		mux_op_3 (.out({opcode_inst3_1,opcode_inst3_2,opcode_inst3_3}),
				.in0(24'b0),
				.in1({ibuff_1,ibuff_2,ibuff_3}),
				.in2({ibuff_2,ibuff_3,ibuff_4}),
				.in3({ibuff_3,ibuff_4,ibuff_5}),
				.in4({ibuff_4,ibuff_5,ibuff_6}),
				.in5({ibuff_5,ibuff_6,8'b0}),
				.in6({ibuff_6,16'b0}),
				.in7({24'b0}),
				.sel(accum_len1) );


// Now select either the 1st, 2nd or 3rd as op.

mux3_24		mux_op (.out({opcode_1_op,opcode_2_op,opcode_3_op}),
			.in0({ibuff_0,ibuff_1,ibuff_2}),
			.in1({opcode_inst2_1,opcode_inst2_2,opcode_inst2_3}),
			.in2({opcode_inst3_1,opcode_inst3_2,opcode_inst3_3}),
			.sel(op_sel) );

// Whenever there's trap_c, we need to reset the opcode 1 of OP to NOP
// This is because trap_r signal (delayed by one flop wrt trap_c) activates
// the valid_op_r in R-stage and the opcode in R when it goes to E will be
// treated as a genuine one.

ff_sre_8		flop_opcode_1_op (.out(opcode_1_op_r_gen),
					.din(opcode_1_op),
					.clk(clk),
					.enable(!hold_r),
					.reset_l(!iu_trap_c));

// Replicating opcode_1_op_r_gen to improve timing 

assign	opcode_1_op_r_rcu   =	opcode_1_op_r_gen;
assign	opcode_1_op_r_ex    =	opcode_1_op_r_gen;
assign	opcode_1_op_r_fpu   =	opcode_1_op_r_gen;
assign	opcode_1_op_r_ucode =	opcode_1_op_r_gen;

ff_se_8		flop_opcode_2_op (.out(opcode_2_op_r),
					.din(opcode_2_op),
					.clk(clk),
					.enable(!hold_r));

ff_se_8		flop_opcode_3_op (.out(opcode_3_op_r),
					.din(opcode_3_op),
					.enable(!hold_r),
					.clk(clk));

ff_se_8		flop_opcode_4_op (.out(opcode_4_op_r),
					.din(ibuff_3),
					.clk(clk),
					.enable(!hold_r));

ff_se_8		flop_opcode_5_op (.out(opcode_5_op_r),
					.din(ibuff_4),
					.clk(clk),
					.enable(!hold_r));

// If there's a folded instr or even just dispatching 1 instr., vaild for op
// should be high

// Whenever there's a kill_vld_d signal is there, kill all valids
// Also propogate all these valids even in the presence of hold_r
assign	valid_op = dec_valid[0] & !kill_vld_d;

// Replicating valid_op_r to improve timing 

ff_sre	flop_vld_op_rcu (.out(valid_op_r_rcu),
			.din(valid_op),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

ff_sre	flop_vld_op_ucode (.out(valid_op_r_ucode),
			.din(valid_op),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

ff_sre	flop_vld_op_gen (.out(valid_op_r_gen),
			.din(valid_op),
			.reset_l(reset_l),
			.enable(!(hold_r & !kill_vld_d)),
			.clk(clk));

// valid_op_r_ex,fgu and pipe are not very critical, so we dont use separate flops to
// generate them

assign	valid_op_r_ex	= valid_op_r_gen;
assign	valid_op_r_pipe	= valid_op_r_gen;
assign	valid_op_r_fpu	= valid_op_r_gen;

// Generate offset from PC, where OP is there.
// This is because: For
// LV LV JMP02 X, we need to know the the pc location where JMP02 is there,
// so that the offset (02) acn be added to it to generate the destination address
// In case of no folding, PC of the first inst. is the pc of branch
// Incase of folding 2, use accum_len0 as the offset and so-on

mux3_8		mux_offset_br(.out(offset_pc_br_dec),
				.in0(8'b00000001),
				.in1(accum_len0),
				.in2(accum_len1),
				.sel(op_sel) );

// encode the pc_br into 4-bits

assign	offset_pc_br[2] = (offset_pc_br_dec[4] | offset_pc_br_dec[5] |
			   offset_pc_br_dec[6] | offset_pc_br_dec[7] );
assign	offset_pc_br[1] = (offset_pc_br_dec[2] | offset_pc_br_dec[3] |
			   offset_pc_br_dec[6] | offset_pc_br_dec[7] );
assign	offset_pc_br[0] = (offset_pc_br_dec[1] | offset_pc_br_dec[3] |
			   offset_pc_br_dec[5] | offset_pc_br_dec[7] );

ff_sre_3		flop_offset_br (.out(offset_pc_br_r),
				.din(offset_pc_br),
				.clk(clk),
				.enable(!hold_r),
				.reset_l(reset_l));

// Opcode, offset and valid for RSd stage

// opcode first
// opcode for RSd can be 
// . 4th instr for group_1
// . 3rd instr for group_4
// . 2nd instr for groups 8 & 9
// . 1st otherwise

assign	opcode_sel_rsd[3] = group_1;
assign	opcode_sel_rsd[2] = group_4;
Next12
HierarchyFilesModulesSignalsTasksFunctionsHelp

This page: Created:Wed Mar 24 09:45:06 1999
From: /import/jet-pj2-sim/rahim/picoJava-II/design/iu/ifu/rtl/ifu.v

Verilog converted to html by v2html 5.0 (written by Costas Calamvokis).Help