HierarchyFilesModulesSignalsTasksFunctionsHelp
12
/****************************************************************
 ---------------------------------------------------------------
     Copyright 1999 Sun Microsystems, Inc., 901 San Antonio
     Road, Palo Alto, CA 94303, U.S.A.  All Rights Reserved.
     The contents of this file are subject to the current
     version of the Sun Community Source License, picoJava-II
     Core ("the License").  You may not use this file except
     in compliance with the License.  You may obtain a copy
     of the License by searching for "Sun Community Source
     License" on the World Wide Web at http://www.sun.com.
     See the License for the rights, obligations, and
     limitations governing use of the contents of this file.

     Sun, Sun Microsystems, the Sun logo, and all Sun-based
     trademarks and logos, Java, picoJava, and all Java-based
     trademarks and logos are trademarks or registered trademarks 
     of Sun Microsystems, Inc. in the United States and other
     countries.
 ----------------------------------------------------------------
******************************************************************/
[Up: ex ex_dpath]
module ex_dpath(carry_in_e,
		adder2_carry_in,
		imdr_data_e,
		pc_r,
		pc_c,
		pc_e,
		vars_out,
		ru_rs1_e,
		ru_rs2_e,
		scache_miss_addr_e,
		ucode_porta_e,
		ucode_portc_e,
		iu_alu_a,
		iu_alu_b,
		u_m_adder_porta_e,
		u_m_adder_portb_e,
		reg_data_e,
		opcode_r,
		dcu_diag_data_c,
		fpu_data_e,
		fpu_hold,
		icu_diag_data_c,
		dcu_data_c,

		rs1_bypass_mux_out,
		rs2_bypass_mux_out,
		iu_data_in,
		ucode_porta_mux_out,
		adder_out_e,
		carry_out_e,
		cmp_gt_e,
		cmp_eq_e,
		cmp_lt_e,
		null_objref_e,
		iu_addr_e,
  		iu_addr_e_2,
		iu_addr_e_31,
		iu_br_pc_e,
		iu_data_e,
		wr_optop_data_e,
		iu_addr_c,
		alu_out_w,
		dcu_data_w,
		load_buffer_mux_out_0,

		shift_dir_e,
		shift_count_e,
		sign_e,
	/******************** monitor caching change *****************/
		monitorenter_e,
			
		iu_data_vld,
		sc_data_vld,
		hold_e,
		hold_c,
		dcu_data_reg_we,
		iu_rs1_e_0_l,
		iu_rs2_e_0_l,

		ucode_active,
		rs1_mux_sel_din,
		forward_c_sel_din,
		ex_adder2_src1_mux_sel,
		rs1_bypass_mux_sel,
		rs2_bypass_mux_sel,
		cmp_mux_sel,
		adder_src1_mux_sel,
		adder_src2_mux_sel,
		shifter_src1_mux_sel,
		shifter_src2_mux_sel,
		shifter_word_sel,
		bit_mux_sel,
		cvt_mux_sel,
		bit_cvt_mux_sel,
		constant_mux_sel,
		alu_out_mux_sel,
		offset_mux_sel,
		adder2_src2_mux_sel,
		iu_br_pc_mux_sel,
		iu_data_mux_sel,
		wr_optop_mux_sel,
		load_data_c_mux_sel,
		fpu_mux_sel,
		forward_w_sel_din,
		load_buffer_mux_sel,

// All of the following signals are added to prevent RS1 and RS2 from
// going from EX -> UCODE -> EX; Timing Improvement 

		iu_optop_e,
		ucode_reg_data,
		ucode_madder_sel2,
		ucode_alu_a_sel,
		ucode_alu_b_sel,
		ucode_porta_sel,
		ucode_busy_e,
		sc_dcache_req,
		

		reset_l,
		clk,
		sm,
		sin,
		so);

   // Data buses
   input	 carry_in_e;		// Carry input to the main adder
   input 	 adder2_carry_in;	// Carry input to the address adder
   input [31:0]  imdr_data_e;		// Result of IMDR unit
   input [31:0]  pc_r;			// R stage PC value
   input [31:0]  pc_e;			// E stage PC value
   input [31:0]  pc_c;			// C stage PC value
   input [31:0]  vars_out;		// C stage VARS value from ex_regs
   input [31:0]  ru_rs1_e;		// Operand 1 value from R unit
   input [31:0]  ru_rs2_e;		// Operand 2 value from R unit
   input [31:0]  scache_miss_addr_e;	// S$ miss address to adder2
   input [31:0]  ucode_porta_e;		// Operand 1 value from ucode
   input [31:0]  ucode_portc_e;		// Result of ucode computation
   input [31:0]  iu_alu_a;		// ucode input to alu adder
   input [31:0]  iu_alu_b;		// ucode input to alu adder
   input [31:0]  u_m_adder_porta_e;	// ucode address 1 to adder2
   input [31:0]  u_m_adder_portb_e;	// ucode address 2 to adder2
   input [31:0]  reg_data_e;		// Output of register mux
   input [31:0]  opcode_r;		/* Coming from IFU
					 * 31:24 - opcode_2_op_r
					 * 23:16 - opcode_3_op_r
					 * 15: 8 - opcode_4_op_r
					 *  7: 0 - opcode_5_op_r
					 */
   input [31:0]  dcu_diag_data_c;	// DCU diagnostic read data
   input [31:0]  fpu_data_e;		// FPU result
   input  	 fpu_hold;		// Hold the fpu data
   input [31:0]  icu_diag_data_c;	// ICU diagnostic read data
   input [31:0]  dcu_data_c;		// Input to dcu_hold_reg

   output [31:0] rs1_bypass_mux_out;	// Output of rs1_bypass_mux
   output [31:0] rs2_bypass_mux_out;	// Output of rs2_bypass_mux
   output [31:0] ucode_porta_mux_out;	// Output of ucode_porta_mux
   output [31:0] iu_data_in;		// Data to be written onto SMU's version
                                        // of sbase in case of wr_sbase opcodes
   output [31:0] adder_out_e;		// Output of main adder
   output 	 carry_out_e;		// Carry output of main adder
   output 	 cmp_gt_e;		// Operand 1 >  operand 2
   output 	 cmp_eq_e;		// Operand 1 == operand 2
   output 	 cmp_lt_e;		// Operand 1 <  operand 2
   output 	 null_objref_e;		// ucode_porta_mux_out == zero
   output	 iu_rs1_e_0_l;		// fast signal for ucode. rs1_bypass[0]
   output	 iu_rs2_e_0_l;		// fast signal for ucode. rs2_bypass[0]
   output [31:0] iu_addr_e;		// Address adder output, E stage
   output	 iu_addr_e_2;
   output	 iu_addr_e_31;
   output [31:0] iu_br_pc_e;		// Branch target PC to ICU
   output [31:0] iu_data_e;		// Data corresponding to iu_addr_e
   output [31:0] wr_optop_data_e;	// Data for write optop operation
   output [31:0] iu_addr_c;		// Address adder output, C stage
   output [31:0] alu_out_w;		// Main alu output, W stage
   output [31:0] dcu_data_w;		// Output of dcu_hold_reg
   output 	 load_buffer_mux_out_0;	// lockbit used in monitorenter	

   // E stage control signals
   input	 ucode_active;
   input 	 shift_dir_e;		// Shifter: 0: left, 1: right
   input [5:0] 	 shift_count_e;		// Shifter: # of bits to shift
   input 	 sign_e;		/* Sign extension for shifter
					 * and converter
					 */
   /******************** monitor caching change  *****************/
   input	monitorenter_e;
   

   // C stage control signals
   input 	 iu_data_vld;		// From IU input
   input 	 sc_data_vld;		// Indicate dcu_data_c is from S$ miss
   input 	 hold_e;		// Hold signal from pipe control
   input 	 hold_c;		// Hold signal from pipe control

   // W stage control signal
   input  [1:0]	 dcu_data_reg_we;	/* Eanbled if:
					 * 1. IDLE and valid
					 * 2. ST1 and ~hold and valid
					 * 3. ST2 and ~hold
					 */
   // Mux select signals.  0 is the default selection.
   input  	 rs1_mux_sel_din;	/* 1: scache_miss_data_e */
   input  	 forward_c_sel_din;	/* 1: scache_miss_data_e
					 */
   input	ex_adder2_src1_mux_sel; 

   input [4:0] 	 rs1_bypass_mux_sel;	/* 16: rs1_hold_reg
					   8: forward_data_w1
					 * 4: forward_data_w
					 * 2: forward_data_c
					 * 1: rs1_mux_out
					 */
   input [4:0] 	 rs2_bypass_mux_sel;	/* 16:rs2_hold_reg
					   8: forward_data_w1
					 * 4: forward_data_w
					 * 2: forward_data_c
					 * 1: ru_rs2_e
					 */
   input [2:0] 	 cmp_mux_sel;		/* 2: Rs1, zero
					 * 1: Rs2, Rs1
					 */
   input [1:0] 	 adder_src1_mux_sel;	/* 2: ~Rs1
					 * 1: Rs1
					 */
   input [2:0] 	 adder_src2_mux_sel;	/* 4: zero
					 * 2: ~Rs2
					 * 1: Rs2
					 */
   input [4:0] 	 shifter_src1_mux_sel;	/* 16: rs2_bypass_mux_out_d1
					 *  8: {32{rs2_bypass_mux_out[31]}}
					 *  4: rs2_bypass_mux_out
					 *  2: rs1_bypass_mux_out
					 *  1: zero
					 */
   input [2:0] 	 shifter_src2_mux_sel;	/* 4: rs2_bypass_mux_out_d1
					 * 2: rs2_bypass_mux_out
					 * 1: zero
					 */
   input 	 shifter_word_sel;	/* 1: MSW
					 * 0: LSW
					 */
   input [4:0] 	 bit_mux_sel;		/* 16: rs1_bypass_mux_out with bit 31
					 *     flipped for fneg and dneg
					 *  8: pc_r
					 *  4: xor
					 *  2: and
					 *  1: or
					 */
   input [4:0] 	 cvt_mux_sel;		/* 16: i2s
					 *  8: i2c
					 *  4: i2b
					 *  2: i2l
					 *  1: sethi
					 */
   input [1:0] 	 bit_cvt_mux_sel;	/* 2: bit operator output
					 * 1: converter output
					 */
   input [2:0] 	 constant_mux_sel;	/* 4:  1 -- gt
					 * 2:  0 -- eq
					 * 1: -1 -- lt
					 */
   input [7:0] 	 alu_out_mux_sel;	/* 128: constant_mux_out
					 *  64: imdr_data_e
					 *  32: reg_mux_out
					 *  16: bit_cvt_mux_out
					 *   8: shifter output
					 *   4: adder output
					 *   2: ucode_portc_e
					 *   1: bypass
					 */
   input [4:0] 	 offset_mux_sel;	/* 16: 32-bit offset for ld/st index
					 *  8: 16-bit offset for ld/st index
					 *  4: 8-bit offset for ld/st index
					 *  2: opcode_e (32-bit offset)
					 *  1: offset16 (16-bit offset)
					 */

   input [3:0] 	 adder2_src2_mux_sel;	/* 8: zero
					 * 4: offset_e
					 * 2: u_m_adder_portb_e
					 * 1: ~u_m_adder_portb_e
					 */
   input [2:0] 	 iu_br_pc_mux_sel;	/* 4: ucode_porta_e
					 * 2: pc_c
					 * 1: adder2 output
					 */
   input [1:0] 	 iu_data_mux_sel;	/* 2: ucode_portc_e
					 * 1: rs2_bypass_mux_out
					 */
   input [1:0] 	 wr_optop_mux_sel;	/* 2: vars_out
					 * 1: ucode_porta_mux_out
					 */
   input [2:0] 	 load_data_c_mux_sel;	/* 4: icu_diag_data_c
					 * 2: dcu_diag_data_c
					 * 1: dcu_data_c 
					 */

   input [1:0]	 fpu_mux_sel;		// 2: fpu_data
					// 1: alu data

   input [1:0] 	 forward_w_sel_din;/* 2: output of dcu_hold register
					 * 1: output of W stage alu_out
					 *    register
					 */
   input [1:0] 	 load_buffer_mux_sel;	/* 2: load_buffer_reg_out (in state 2)
					 * 1: dcu_data_c (otherwise)
					 */

   input [31:0] iu_optop_e;		// optop
   input [31:0] ucode_reg_data;		// ucode arch reg data
   input [3:0]	ucode_madder_sel2;	// u_f19[3] and u_f19[1:0] frpom ucode
   input [1:0]	ucode_alu_a_sel;	// u_f23[1:0] from ucode
   input [2:0]	ucode_alu_b_sel;	// u_f07[2:0] from ucode
   input [1:0]	ucode_porta_sel;	// u_f00[1:0] from ucode
   input	ucode_busy_e;		// Ucode is busy
   input	sc_dcache_req;		// dcache req on a smiss

   // Miscellaneous global signals
   input	 reset_l;		// Global Reset signal
   input 	 clk;			// Global clock signal
   input 	 sm;			// Scan enable
   input 	 sin;			// Scan input of this module
   output 	 so;			// Scan output of this module

   
   // Internal buses
   wire [31:0] 	scache_miss_data_e;	// Operand 1 value when S$ miss
   wire [31:0] 	rs1_bypass_hold;	/* Latched rs1_bypass_mux_out for the
					 * "bubbled" forward case
					 */
   wire [31:0] 	rs2_bypass_hold;	/* Latched rs2_bypass_mux_out for the
					 * "bubbled" forward case
					 */
   wire [31:0] 	rs1_mux_out;		// Output of rs1_mux
   wire [31:0] 	ucode_alu_a_mux_out;	// Output of ucode_alu_a_mux
   wire [31:0] 	ucode_alu_b_mux_out;	// Output of ucode_alu_b_mux
   wire [31:0] 	cmp_src1_mux_out;	// Output of cmp_src1_mux
   wire [31:0] 	cmp_src2_mux_out;	// Output of cmp_src2_mux
   wire [31:0] 	adder_src1_mux_out;	// Output of adder_src1_mux
   wire [31:0] 	adder_src2_mux_out;	// Output of adder_src2_mux
   wire [31:0] 	rs2_bypass_mux_out_d1;	// Latched version of rs2_bypass_mux_out
   wire [31:0] 	shifter_src1_mux_out;	// Output of shifter_msb_mux
   wire [31:0] 	shifter_src2_mux_out;	// Output of shifter_lsb_mux
   wire [31:0] 	shifter_out;		// Output of 64-bit shifter
   wire [31:0] 	bit_mux_out;		// Output of bit_mux
   wire [31:0] 	cvt_src0;		// Input to converter
   wire [31:0] 	cvt_src1;		// Input to converter
   wire [31:0] 	cvt_src2;		// Input to converter
   wire [31:0] 	cvt_src3;		// Input to converter
   wire [31:0] 	cvt_src4;		// Input to converter
   wire [31:0] 	cvt_mux_out;		// Output of cvt_mux
   wire [31:0] 	bit_cvt_mux_out;	// Output of bit_cvt_mux
   wire [31:0] 	constant_mux_out;	// Output of constant_mux
   wire [31:0] 	alu_out_e;		// Output of alu_out_mux
   wire [31:0] 	alu_out_c;		// Latched version of alu_out_e 
   wire [31:0]  load_data_c;		// Input to the load buffer
   wire [31:0] 	alu_out_c_d1;		// Latched version of alu_out_c 
   wire [31:0] 	alu_out_w;		// Mux output in W stage
   wire [31:0] 	forward_data_c;		// forward_data_c_mux_out
   wire [31:0] 	load_buffer_reg_out;	// Output of the (1st) load_buffer_reg
   wire [31:0] 	load_buffer_mux_out;	// Outptu of the load_buffer_mux
   wire [31:0] 	forward_data_w;		/* forward_data_w_mux_out
					 * Equivalent to iu_data_w
					 */
   wire [31:0] 	forward_data_w1;	// Output of W1 stage register
   wire [31:0] 	opcode_e;		// Latched version of opcode_r
   wire [31:0] 	offset16;		/* Signed extended offset for 16-bit
					 * branch offset
					 */
   wire	[31:0]	rs2_bypass_mux_out_gen;
   wire	[31:0]	rs2_bypass_mux_out_v1;

   wire	[31:0]	rs1_bypass_mux_out_gen;
   wire	[31:0]	rs1_bypass_mux_out_inv;
   wire	[31:0]	rs1_bypass_mux_out_v1;
   wire	[31:0]	rs1_bypass_mux_out_v2;

   wire [31:0] 	alu_out_e_new;		// including fpu data. timing fix
   wire [31:0] 	offset_e;		// Branch offset to adder2
   wire [31:0] 	adder2_src1_mux_out;	// Output of adder2_src1_mux
   wire [31:0] 	adder2_src2_mux_out;	// Output of adder2_src2_mux
   wire 	adder2_carry_out;	// Carry output of dpath_adder2

// All of the following signals are added to improve timing 

   wire [4:0]	rs1_bypass_mux_sel_e;
   wire [4:0]	rs2_bypass_mux_sel_e;
   wire [1:0]	rs1_mux_sel;
   wire		rs1_0_mux_out;
   wire		rs1_0;
   wire		rs2_0;
   wire [31:0] cmp_porta ;
   wire [1:0]  forward_data_c_mux_sel;
   wire [1:0]  forward_data_w_mux_sel;


   wire	[7:0]	adder2_src1_mux_sel;
   wire [3:0]	ucode_alu_a_mux_sel;
   wire [7:0]	ucode_alu_b_mux_sel;
   wire	[2:0]	ucode_porta_mux_sel;
	

   /*****************************
    *				*
    *		E stage		*
    *				*
    *****************************/

   ff_se_32 scache_miss_data_e_reg(.out	   (scache_miss_data_e[31:0]),
				   .din	   (dcu_data_c[31:0]),
				   .enable (iu_data_vld & sc_data_vld),
				   .clk	   (clk)
				   );
   ff_s_32 rs1_bypass_reg(.out (rs1_bypass_hold[31:0]),
			  .din (rs1_bypass_mux_out[31:0]),
			  .clk (clk)
			  );
   ff_s_32 rs2_bypass_reg(.out (rs2_bypass_hold[31:0]),
			  .din (rs2_bypass_mux_out[31:0]),
			  .clk (clk)
			  );
   mux2_32 rs1_mux(.out	(rs1_mux_out[31:0]),
		   .in1	(scache_miss_data_e[31:0]),
		   .in0	(ru_rs1_e[31:0]),
		   .sel	(rs1_mux_sel[1:0])
		   );

// Because of timing reasons, we have moved the mux selects for 
// Rs1 and Rs2 into data path only 

   ff_sr	flop_rs1_mux(.out(rs1_mux_sel[1]),
                    	.din(rs1_mux_sel_din),
                       	.clk(clk),
                       	.reset_l(reset_l));

   assign	rs1_mux_sel[0] = !rs1_mux_sel[1];


   ff_sre_4	flop_rs1_bypass_sel(.out(rs1_bypass_mux_sel_e[3:0]),
			.din(rs1_bypass_mux_sel[3:0]),
                        .clk(clk),
			.enable(!(hold_e & !ucode_active)),
                        .reset_l(reset_l));

   ff_sr	flop_rs1_bypass_sel_4(.out(rs1_bypass_mux_sel_e[4]),
			.din(rs1_bypass_mux_sel[4]),
                        .clk(clk),
                        .reset_l(reset_l));


   mux5_32 rs1_bypass_mux(.out (rs1_bypass_mux_out_gen[31:0]),
			   .in4 (rs1_bypass_hold[31:0]),
			   .in3 (forward_data_c[31:0]),
			   .in2 (forward_data_w[31:0]),
			   .in1 (forward_data_w1[31:0]),
			   .in0 (rs1_mux_out[31:0]),
			   .sel (rs1_bypass_mux_sel_e[4:0])
			   );

// Constructing a buffer tree so as to aid timing 
// rs1_bypass_mux_out is used in non-critical places, while
// rs1_bypass_mux_out_v1 and v2 will be used in timing critical places

buf10_drv_32	buf_rs1 (.out(rs1_bypass_mux_out), .in(rs1_bypass_mux_out_gen) );
inv10_drv_32	inv_rs1_1 (.out(rs1_bypass_mux_out_inv), .in(rs1_bypass_mux_out_gen) );
inv10_drv_32	inv_rs1_2 (.out(rs1_bypass_mux_out_v1), .in(rs1_bypass_mux_out_inv) );
inv10_drv_32	inv_rs1_3 (.out(rs1_bypass_mux_out_v2), .in(rs1_bypass_mux_out_inv) );


// The following two muxes are replicated to generate a dedicated
// rs1 bit 0 to be used in ucode. This will help some
// critical paths

   mux2 rs1_0_mux(.out	(rs1_0_mux_out),
		   .in1	(scache_miss_data_e[0]),
		   .in0	(ru_rs1_e[0]),
		   .sel	(rs1_mux_sel[1:0])
		   );

   mux5 rs1_0_bypass_mux(.out (rs1_0),
			   .in4 (rs1_bypass_hold[0]),
			   .in3 (forward_data_c[0]),
			   .in2 (forward_data_w[0]),
			   .in1 (forward_data_w1[0]),
			   .in0 (rs1_0_mux_out),
			   .sel (rs1_bypass_mux_sel_e[4:0])
			   );

   assign	iu_rs1_e_0_l	= ~rs1_0 ;

// Because of timing reasons, we have moved the mux selects for 
// Rs1 and Rs2 into data path only 

   ff_sre_4	flop_rs2_bypass_sel(.out(rs2_bypass_mux_sel_e[3:0]),
			.din(rs2_bypass_mux_sel[3:0]),
                        .clk(clk),
			.enable(!hold_e),
                        .reset_l(reset_l));

   ff_sr	flop_rs2_bypass_sel_4(.out(rs2_bypass_mux_sel_e[4]),
			.din(rs2_bypass_mux_sel[4]),
                        .clk(clk),
                        .reset_l(reset_l));

   mux5_32 rs2_bypass_mux (.out (rs2_bypass_mux_out_gen[31:0]),
			   .in4 (rs2_bypass_hold[31:0]),
			   .in3 (forward_data_c[31:0]),
			   .in2 (forward_data_w[31:0]),
			   .in1 (forward_data_w1[31:0]),
			   .in0 (ru_rs2_e[31:0]),
			   .sel (rs2_bypass_mux_sel_e[4:0])
			   );

// Inserting a buffer tree for improving timing on RS2 

buf10_drv_32	buf_rs2_1(.out(rs2_bypass_mux_out), .in(rs2_bypass_mux_out_gen) );
buf10_drv_32	buf_rs2_2(.out(rs2_bypass_mux_out_v1), .in(rs2_bypass_mux_out_gen) );

// The following mux is replicated to give a dedicated rs2[0] to 
// ucode to help timing 

   mux5 rs2_0_bypass_mux (.out (rs2_0),
			   .in4 (rs2_bypass_hold[0]),
			   .in3 (forward_data_c[0]),
			   .in2 (forward_data_w[0]),
			   .in1 (forward_data_w1[0]),
			   .in0 (ru_rs2_e[0]),
			   .sel (rs2_bypass_mux_sel_e[4:0])
			   );

   assign	iu_rs2_e_0_l	= ~rs2_0 ;

/***********       iu_alu_a path improvement  ********************/
/****				BEGIN		    	*****/

//   mux2_32 ucode_alu_a_mux(.out	(ucode_alu_a_mux_out[31:0]),
//			   .in1	(iu_alu_a[31:0]),
//			   .in0	(rs1_bypass_mux_out[31:0]),
//			   .sel	(ucode_mux_sel[1:0])
//			   );

   mux4_32 ucode_alu_a_mux(.out	(ucode_alu_a_mux_out[31:0]),
			   .in3	({rs1_bypass_mux_out_v1[28:0],3'b0}),
			   .in2	({rs1_bypass_mux_out_v1[29:0],2'b0}),
			   .in1	(iu_alu_a[31:0]),
			   .in0	(rs1_bypass_mux_out_v1[31:0]),
			   .sel	(ucode_alu_a_mux_sel[3:0])
			   );

// If u_f23[1:0] == 11, select RS1[28:0],3'b0
assign	ucode_alu_a_mux_sel[3] = ucode_alu_a_sel[1] & ucode_alu_a_sel[0];

// If u_f23[1:0] == 10, select RS1[29:0],2'b0
assign	ucode_alu_a_mux_sel[2] = ucode_alu_a_sel[1] & !ucode_alu_a_sel[0];

// If u_f23[1:0] == 01, select iu_alu_a
assign	ucode_alu_a_mux_sel[1] = !ucode_alu_a_sel[1] & ucode_alu_a_sel[0];

// If u_f23[1:0] == 00, select RS1
assign	ucode_alu_a_mux_sel[0] = !ucode_alu_a_sel[1] & !ucode_alu_a_sel[0];

/****				  END				    	*****/
/***********       iu_alu_a path improvement   ********************/

/************    iu_alu_b path improvement      *******************/
/****				BEGIN				    	*****/

//   mux2_32 ucode_alu_b_mux(.out(ucode_alu_b_mux_out[31:0]),
//			   .in1	(iu_alu_b[31:0]),
//			   .in0	(rs2_bypass_mux_out[31:0]),
//			   .sel	(ucode_mux_sel[1:0])
//			   );

   mux8_32 ucode_alu_b_mux(.out	(ucode_alu_b_mux_out[31:0]),
			   .in7	(iu_alu_b[31:0]),
			   .in6	(rs1_bypass_mux_out_v1[31:0]),
			   .in5	(ucode_reg_data[31:0]),
			   .in4	(iu_optop_e[31:0]),
			   .in3	({rs1_bypass_mux_out_v1[31:2],2'b0}),
			   .in2	({ucode_reg_data[31:2],2'b0}),
			   .in1	(32'b0),
			   .in0	(rs2_bypass_mux_out_v1[31:0]),
			   .sel	(ucode_alu_b_mux_sel[7:0])
			   );

// if u_f07[2:0] == 001, select iu_alu_b
assign	ucode_alu_b_mux_sel[7] 	=  !ucode_alu_b_sel[2] & !ucode_alu_b_sel[1] 
					& ucode_alu_b_sel[0];

// if u_f07[2:0] == 010, select RS1
assign	ucode_alu_b_mux_sel[6] 	=  !ucode_alu_b_sel[2] & ucode_alu_b_sel[1] 
					& !ucode_alu_b_sel[0];

// if u_f07[2:0] == 011, select ucode_reg_data
assign	ucode_alu_b_mux_sel[5] 	=  !ucode_alu_b_sel[2] & ucode_alu_b_sel[1] 
					& ucode_alu_b_sel[0];

// if u_f07[2:0] == 100, select iu_optop_e
assign	ucode_alu_b_mux_sel[4] 	=  ucode_alu_b_sel[2] & !ucode_alu_b_sel[1] 
					& !ucode_alu_b_sel[0];

// if u_f07[2:0] == 101, select RS1[31:2],2'b0
assign	ucode_alu_b_mux_sel[3] 	=  ucode_alu_b_sel[2] & !ucode_alu_b_sel[1] 
					& ucode_alu_b_sel[0];

// if u_f07[2:0] == 110, select ucode_reg_data[31:2],2'b0
assign	ucode_alu_b_mux_sel[2] 	=  ucode_alu_b_sel[2] & ucode_alu_b_sel[1] 
					& !ucode_alu_b_sel[0];

// if u_f07[2:0] == 111, select 32'b0
assign	ucode_alu_b_mux_sel[1] 	=  ucode_alu_b_sel[2] & ucode_alu_b_sel[1] 
					& ucode_alu_b_sel[0];

// if u_f07[2:0] == 000, select RS2
assign	ucode_alu_b_mux_sel[0] 	=  !ucode_alu_b_sel[2] & !ucode_alu_b_sel[1] 
					& !ucode_alu_b_sel[0];

/****				 END				    	*****/
/*******       iu_alu_b path improvement        *******************/


/*******  ucode_porta_mux_out path improvement  *******************/
/****				BEGIN				    	*****/

    mux2_32 ucode_porta_mux(.out	(ucode_porta_mux_out[31:0]),
 			   .in1	(ucode_porta_e[31:0]),
 			   .in0	(rs1_bypass_mux_out_v1[31:0]),
 			   .sel	({ucode_busy_e,~ucode_busy_e})
 			   );

   mux3_32 cmp_porta_mux(.out	(cmp_porta[31:0]),
			   .in2	(rs2_bypass_mux_out_v1[31:0]),
			   .in1	(ucode_porta_e[31:0]),
			   .in0	(rs1_bypass_mux_out_v2[31:0]),
			   .sel	(ucode_porta_mux_sel[2:0]));

// Select RS2 whenever u_f00[1:0] is 11
assign	ucode_porta_mux_sel[2] 	=  ucode_porta_sel[1] & ucode_porta_sel[0];

// Select ucode_porta_e whenever u_f00[1:0] is 10
assign	ucode_porta_mux_sel[1] 	=  ucode_porta_sel[1] & !ucode_porta_sel[0];

// Select RS1 otherwise
assign	ucode_porta_mux_sel[0] 	=  !ucode_porta_sel[1];

/****				 END				    	*****/
/*******  ucode_porta_mux_out path improvement  *******************/

// Data to be written onto SMU's version of sbase in case 
// of wr_sbase opcodes. This is done to improve timing on 
// smu_stall signal 

assign 	iu_data_in = ucode_porta_mux_out;

   compare_zero_32 objref_cmp(.in(cmp_porta[31:0]),
			.out(null_objref_e)
			 );

   // The signed/unsigned 32-bit comparator
   mux3_32 cmp_src1_mux(.out (cmp_src1_mux_out[31:0]),
			.in2 (rs1_bypass_mux_out_v2[31:0]),
			.in1 ({2'b0,rs2_bypass_mux_out_v1[29:2],2'b0}),
			.in0 (rs2_bypass_mux_out_v1[31:0]),
			.sel (cmp_mux_sel[2:0])
			);
   mux3_32 cmp_src2_mux(.out (cmp_src2_mux_out[31:0]),
			.in2 (32'h0),
			.in1 ({2'b0,rs1_bypass_mux_out_v2[29:2],2'b0}),
			.in0 (rs1_bypass_mux_out_v2[31:0]),
			.sel (cmp_mux_sel[2:0])
			);
   cmp_32 dpath_cmp(.gt	  (cmp_gt_e),
		    .eq	  (cmp_eq_e),
		    .lt	  (cmp_lt_e),
		    .in1  (cmp_src1_mux_out[31:0]),
		    .in2  (cmp_src2_mux_out[31:0]),
		    .sign (sign_e)
		    );

   // The 32-bit adder
   mux2_32 adder_src1_mux(.out	(adder_src1_mux_out[31:0]),
			  .in1	(~ucode_alu_a_mux_out[31:0]),
			  .in0	(ucode_alu_a_mux_out[31:0]),
			  .sel	(adder_src1_mux_sel[1:0])
			  );
   mux3_32 adder_src2_mux(.out	(adder_src2_mux_out[31:0]),
			  .in2	(32'h0),
			  .in1	(~ucode_alu_b_mux_out[31:0]),
			  .in0	(ucode_alu_b_mux_out[31:0]),
			  .sel	(adder_src2_mux_sel[2:0])
			  );
   cla_adder_32 dpath_adder(.cout (carry_out_e),
			    .sum  (adder_out_e[31:0]),
			    .in1  (adder_src1_mux_out[31:0]),
			    .in2  (adder_src2_mux_out[31:0]),
			    .cin  (carry_in_e)
			    );

   // The 64-bit shifter
   /* 
    * We need to latch rs2_bypass_mux_out for long shift so the second cycle
    * can use the correct operand (LSW for lshl and MSW for lshr and lushr).
    */
   ff_se_32 rs2_bypass_mux_out_reg(.out    (rs2_bypass_mux_out_d1[31:0]),
				   .din    (rs2_bypass_mux_out[31:0]),
				   .enable (~hold_e),
				   .clk    (clk)
				   );
   mux5_32 shifter_src1_mux(.out (shifter_src1_mux_out[31:0]),
			    .in4 (rs2_bypass_mux_out_d1[31:0]),
			    .in3 ({32{rs2_bypass_mux_out[31]}}),
			    .in2 (rs2_bypass_mux_out[31:0]),
			    .in1 (rs1_bypass_mux_out[31:0]),
			    .in0 (32'h0),
			    .sel (shifter_src1_mux_sel[4:0])
			    );
   mux3_32 shifter_src2_mux(.out (shifter_src2_mux_out[31:0]),
			    .in2 (rs2_bypass_mux_out_d1[31:0]),
			    .in1 (rs2_bypass_mux_out[31:0]),
			    .in0 (32'h0),
			    .sel (shifter_src2_mux_sel[2:0])
			    );
   shift_64 dpath_shifter(.out	    (shifter_out[31:0]),
			  .msw_in   (shifter_src1_mux_out[31:0]),
			  .lsw_in   (shifter_src2_mux_out[31:0]),
			  .count    (shift_count_e[5:0]),
			  .dir	    (shift_dir_e),
			  .arith    (sign_e),
			  .word_sel (shifter_word_sel)
			  );

   // AND, OR, XOR
   mux5_32 bit_mux(.out	(bit_mux_out[31:0]),
		   .in4 ({~rs1_bypass_mux_out[31], rs1_bypass_mux_out[30:0]}),
		   .in3	(pc_r[31:0]),
		   .in2	(rs1_bypass_mux_out[31:0] ^ rs2_bypass_mux_out[31:0]),
		   .in1	(rs1_bypass_mux_out[31:0] & rs2_bypass_mux_out[31:0]),
		   .in0	(rs1_bypass_mux_out[31:0] | rs2_bypass_mux_out[31:0]),
		   .sel	(bit_mux_sel[4:0])
		   );

   // Converter
   // sethi
   assign cvt_src0 = {offset_e[15:0], rs1_bypass_mux_out[15:0]};
   // i2l
   assign cvt_src1 = {32{rs1_bypass_mux_out[31]}};
   // i2b
   assign cvt_src2 = {{24{rs1_bypass_mux_out[7]}}, rs1_bypass_mux_out[7:0]};
   // i2c
   assign cvt_src3 = {16'h0, rs1_bypass_mux_out[15:0]};
   // i2s
   assign cvt_src4 = {{16{rs1_bypass_mux_out[15]}}, rs1_bypass_mux_out[15:0]};
   mux5_32 cvt_mux(.out	(cvt_mux_out[31:0]),
		   .in4	(cvt_src4[31:0]),
		   .in3	(cvt_src3[31:0]),
		   .in2	(cvt_src2[31:0]),
		   .in1	(cvt_src1[31:0]),
		   .in0	(cvt_src0[31:0]),
		   .sel	(cvt_mux_sel[4:0])
		   );
   mux2_32 bit_cvt_mux(.out (bit_cvt_mux_out[31:0]),
		       .in1 (bit_mux_out[31:0]),
		       .in0 (cvt_mux_out[31:0]),
		       .sel (bit_cvt_mux_sel[1:0])
		       );
   mux3_32 constant_mux(.out (constant_mux_out[31:0]),
			.in2 (32'h1),
			.in1 (32'h0),
			.in0 (32'hffffffff),
			.sel (constant_mux_sel[2:0])
			);
   mux8_32 alu_out_mux(.out (alu_out_e[31:0]),
		       .in7 (constant_mux_out[31:0]),
		       .in6 (imdr_data_e[31:0]),
		       .in5 (reg_data_e[31:0]),
		       .in4 (bit_cvt_mux_out[31:0]),
		       .in3 (shifter_out[31:0]),
		       .in2 (adder_out_e[31:0]),
		       .in1 (ucode_portc_e[31:0]),
		       .in0 (rs1_bypass_mux_out[31:0]),
		       .sel (alu_out_mux_sel[7:0])
		       );
   ff_se_32 opcode_r_reg(.out	 (opcode_e[31:0]),
			 .din	 (opcode_r[31:0]),
			 .enable (~hold_e),
			 .clk	 (clk)
			 );
   assign offset16 = {{16{opcode_e[31]}}, opcode_e[31:16]};
   mux5_32 offset_mux(.out (offset_e[31:0]),
		      .in4 ({{22{opcode_e[23]}}, opcode_e[23:16], 2'b00}),
		      .in3 ({{23{opcode_e[23]}}, opcode_e[23:16], 1'b0}),
		      .in2 ({{24{opcode_e[23]}}, opcode_e[23:16]}),
		      .in1 (opcode_e[31:0]),
		      .in0 (offset16[31:0]),
		      .sel (offset_mux_sel[4:0])
		      );
/*********** m_adder_port_a path improvement   ********************/
/****				BEGIN				    	*****/

// Replacing the following mux to improve timing 

//    mux4_32 adder2_src1_mux(.out	(adder2_src1_mux_out[31:0]),
// 			   .in3	(u_m_adder_porta_e[31:0]),
// 			   .in2	(rs1_bypass_mux_out[31:0]),
// 			   .in1	(scache_miss_addr_e[31:0]),
// 			   .in0	(pc_e[31:0]),
// 			   .sel	(adder2_src1_mux_sel[3:0])
// 			   );


mux8_32 adder2_src1_mux(.out(adder2_src1_mux_out[31:0]),
                        .in7(scache_miss_addr_e[31:0]),
                        .in6(rs1_bypass_mux_out_v2[31:0]),
                        .in5({rs1_bypass_mux_out_v2[31:2],2'b0}),
                        .in4({rs2_bypass_mux_out_v1[31:2],2'b0}),
                        .in3(ucode_reg_data[31:0]),
                        .in2({ucode_reg_data[31:6],2'b10,ucode_reg_data[3:0]}),
                        .in1(u_m_adder_porta_e[31:0]),
                        .in0(pc_e),
                        .sel (adder2_src1_mux_sel[7:0]) );
 

assign  adder2_src1_mux_sel[7]  = sc_dcache_req;
 
// Whenever f21[1] is set and f19[3:0] are 1001 or whenever
// ex_adder2_src1_mux_sel is  high pick RS1
 
assign  adder2_src1_mux_sel[6]  = ( (ucode_madder_sel2[3] & !ucode_madder_sel2[2] &
                                    !ucode_madder_sel2[1] & ucode_madder_sel2[0]) |
                                    (ex_adder2_src1_mux_sel &! ucode_busy_e) );
 
// Whenever f21[1] is set and f19[3:0] are 1010, pick RS1[31:2],2'b0

/******************** monitor caching change  *****************/
// pick RS1[31:2],2'b0 when checking lockbit in monitorenter
 
assign  adder2_src1_mux_sel[5]  = (ucode_madder_sel2[3] & !ucode_madder_sel2[2] &
                                        ucode_madder_sel2[1] & !ucode_madder_sel2[0])
                                  | monitorenter_e;
 
// Whenever f19[3:0] are 1011, pick RS2[31:2],2'b0
 
assign  adder2_src1_mux_sel[4]  = ucode_madder_sel2[3] & !ucode_madder_sel2[2] &
                                        ucode_madder_sel2[1] & ucode_madder_sel2[0];
 
// Whenever f19[3:0] are 0110, pick ucode_reg_data
 
assign  adder2_src1_mux_sel[3]  = !ucode_madder_sel2[3] & ucode_madder_sel2[2] &
                                        ucode_madder_sel2[1] & !ucode_madder_sel2[0];
 
// Whenever f19[3:0] are 0111, pick ucode_reg_data[31:6],2'b10,[3:0]
 
assign  adder2_src1_mux_sel[2]  = !ucode_madder_sel2[3] & ucode_madder_sel2[2] &
                                        ucode_madder_sel2[1] & ucode_madder_sel2[0];
 
// Whenever ucode_busy_e is set, pick u_m_adder_porta_e
 
assign  adder2_src1_mux_sel[1]  = ucode_busy_e ;

// Else pick pc_e
 
assign  adder2_src1_mux_sel[0]  = !ucode_busy_e ;
 

/****				  END				    	*****/
/*********** m_adder_port_a path improvement 8  ********************/




   mux4_32 adder2_src2_mux(.out	(adder2_src2_mux_out[31:0]),
			   .in3	(32'h0),
			   .in2	(offset_e[31:0]),
			   .in1 (u_m_adder_portb_e[31:0]),
                           .in0 (~u_m_adder_portb_e[31:0]),
			   .sel	(adder2_src2_mux_sel[3:0])
			   );

   /*
    * For extended ld/st, bit 31 is used for GC and bit 30 is used for LE.
    * But for diagnostis rd/wr, bit 31 is used to select which way of the
    * cache to access.  Need to select the right bit to go into DCU.
    */
   cla_adder_32 dpath_adder2(.cout (adder2_carry_out),
			     .sum  (iu_addr_e[31:0]),
			     .in1  (adder2_src1_mux_out[31:0]),
			     .in2  (adder2_src2_mux_out[31:0]),
			     .cin  (adder2_carry_in)
			     );
   assign iu_addr_e_2 =  rs1_bypass_mux_out[2] ;
   assign iu_addr_e_31 = rs1_bypass_mux_out[31] ;

   /*
    * iu_addr_c is used to check mem_protection_error and data breakpoints.
    * Bit 31 (GC) and bit 30 (LE) should be masked off so they don't get
    * compared.
    */
   assign iu_addr_c[31:30] = 2'b00;
   ff_se_30 iu_addr_e_reg(.out	  (iu_addr_c[29:0]),
			  .din	  (iu_addr_e[29:0]),
			  .enable (~hold_c),
			  .clk	  (clk)
			  );
   mux3_32 iu_br_pc_mux(.out (iu_br_pc_e[31:0]),
			.in2 (ucode_porta_e[31:0]),
			.in1 (pc_c[31:0]),
			.in0 (iu_addr_e[31:0]),
			.sel (iu_br_pc_mux_sel[2:0])
			);
   mux2_32 iu_data_mux(.out (iu_data_e[31:0]),
		       .in1 (ucode_portc_e[31:0]),
		       .in0 (rs2_bypass_mux_out_v1[31:0]),
		       .sel (iu_data_mux_sel[1:0])
		       );
   mux2_32 wr_optop_mux(.out (wr_optop_data_e[31:0]),
			.in1 (vars_out[31:0]),
			.in0 (ucode_porta_mux_out[31:0]),
			.sel (wr_optop_mux_sel[1:0])
			);

    mux2_32 fpu_data_mux(.out (alu_out_e_new[31:0]),
                        .in1 (fpu_data_e[31:0]),
                        .in0 (alu_out_e[31:0]),
                        .sel (fpu_mux_sel[1:0])
                        );

   ff_se_32 alu_out_c_reg(.out	  (alu_out_c[31:0]),
			  .din	  (alu_out_e_new[31:0]),
			  .enable (~hold_c | ~fpu_hold&fpu_mux_sel[1]),
			  .clk	  (clk)
			  );

   /*****************************
    *				*
    *		C stage		*
    *				*
    *****************************/

// Because of timing reasons, we have moved the mux selects for 
// Rs1 and Rs2 into data path only 

   mux2_32 forward_data_c_mux(.out (forward_data_c[31:0]),
			      .in1 (dcu_data_w[31:0]),
			      .in0 (alu_out_c[31:0]),
			      .sel (forward_data_c_mux_sel[1:0])
			      );

ff_sr        for_c_flop( .out(forward_data_c_mux_sel[1]),
                                .din(forward_c_sel_din),
                                .clk(clk),
                                .reset_l(reset_l));

assign	forward_data_c_mux_sel[0] = !forward_data_c_mux_sel[1];



   ff_se_32 alu_out_c_d1_reg(.out (alu_out_c_d1[31:0]),
			  .din	  (alu_out_c[31:0]),
			  .enable (~hold_c),
			  .clk	  (clk)
			  );

   mux3_32  load_data_c_mux (.out(load_data_c[31:0]),
                             .in2 (icu_diag_data_c[31:0]),
                             .in1 (dcu_diag_data_c[31:0]),
                             .in0 (dcu_data_c[31:0]),
                             .sel (load_data_c_mux_sel[2:0]));

   /*
    * The following logic handles this problem:
    *
    * E stage | C stage | W stage
    *  ucode  |   LD1   |   LD2
    *
    * ucode in E wants to use the result of LD2 (through forwarding), but LD1
    * will overwrite the register with its result.
    */

   ff_se_32 load_buffer_reg(.out    (load_buffer_reg_out[31:0]),
			    .din    (load_data_c[31:0]),
			    .enable (dcu_data_reg_we[1]),
			    .clk    (clk)
			    );
   mux2_32 load_buffer_mux(.out	(load_buffer_mux_out[31:0]),
			   .in1	(load_buffer_reg_out[31:0]),
			   .in0	(load_data_c[31:0]),
			   .sel (load_buffer_mux_sel[1:0])
			   );
   ff_se_32 dcu_data_reg(.out	 (dcu_data_w[31:0]),
			 .din	 (load_buffer_mux_out[31:0]),
			 .enable (dcu_data_reg_we[0]),
			 .clk	 (clk)
			 );
			 
// output to ex_ctl as the lockbit used in monitorenter

   assign load_buffer_mux_out_0 = load_buffer_mux_out[0];	
	
Next12
HierarchyFilesModulesSignalsTasksFunctionsHelp

This page: Created:Wed Mar 24 09:44:11 1999
From: /import/jet-pj2-sim/rahim/picoJava-II/design/iu/ex/rtl/ex_dpath.v

Verilog converted to html by v2html 5.0 (written by Costas Calamvokis).Help