LoongArch 五级流水线实现

2023-10-29 16:20

本文主要是介绍LoongArch 五级流水线实现,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

在单周期的基础上进行拆分成取指、译码、执行、访存、写回五级流水线。

mycpu_top.v
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
2. IF_stage.v
`include "mycpu.h"module if_stage(input                          clk            ,input                          reset          ,//allwoininput                          ds_allowin     ,//brbusinput  [`BR_BUS_WD       -1:0] br_bus         ,//to dsoutput                         fs_to_ds_valid ,output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus   ,// inst sram interfaceoutput        inst_sram_en   ,output [ 3:0] inst_sram_we  ,output [31:0] inst_sram_addr ,output [31:0] inst_sram_wdata,input  [31:0] inst_sram_rdata
);reg         fs_valid;//表示当前阶段是否有效
wire        fs_ready_go;
wire        fs_allowin; //并确定 IF 阶段是否接受取指令的请求
wire        to_fs_valid;//表示是否可以将指令传递到下一个阶段// wire         pre_fs_ready_go;  //增加
// wire         br_stall;        //增加
// assign to_fs_valid      = ~reset && pre_fs_ready_go;//修改
// assign pre_fs_ready_go  = ~br_stall;  //增加wire [31:0] seq_pc;
wire [31:0] nextpc;wire        br_taken;
wire [31:0] br_target;
assign {br_taken,br_target} = br_bus;
// assign {br_stall,br_taken,br_target} = br_bus; //修改     分支预测
// assign inst_sram_en = to_fs_valid && fs_allowin && ~br_stall; //修改
wire [31:0] fs_inst;
reg  [31:0] fs_pc;
assign fs_to_ds_bus = {fs_inst ,fs_pc   };// pre-IF stage
assign to_fs_valid  = ~reset;
assign seq_pc       = fs_pc + 3'h4;
assign nextpc       = br_taken ? br_target : seq_pc; // IF stage
assign fs_ready_go    = 1'b1;
assign fs_allowin     = !fs_valid || (fs_ready_go && ds_allowin);
assign fs_to_ds_valid =  fs_valid && fs_ready_go;always @(posedge clk) beginif (reset) beginfs_valid <= 1'b0;endelse if (fs_allowin) beginfs_valid <= to_fs_valid;endif (reset) beginfs_pc <= 32'h1bfffffc;  //trick: to make nextpc be 0x1c000000 during reset endelse if (to_fs_valid && fs_allowin) beginfs_pc <= nextpc;end
endassign inst_sram_en    = to_fs_valid && fs_allowin;
assign inst_sram_we   = 4'h0;
assign inst_sram_addr  = nextpc;
assign inst_sram_wdata = 32'b0;assign fs_inst         = inst_sram_rdata;endmodule
3. ID_stage
`include "mycpu.h"module id_stage(input                          clk           ,input                          reset         ,//allowininput                          es_allowin    ,output                         ds_allowin    ,//from fsinput                          fs_to_ds_valid,input  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus  ,//to esoutput                         ds_to_es_valid,output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to fsoutput [`BR_BUS_WD       -1:0] br_bus        ,//to rf: for write backinput  [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus  //// output [ 4                 :0] WB_dest,EXE_dest,MEM_dest,// input                          es_load_op    ,// input  [31                 :0] EXE_result,MEM_result,WB_result
);
// wire         br_stall;        //增加
// wire         load_stall;
wire         br_taken;
reg          ds_valid   ;
wire [31:0]  br_target;// assign br_bus       = {br_stall,br_taken,br_target};  //修改wire        ds_ready_go;wire [31                 :0] fs_pc;
reg  [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];wire [31:0] ds_inst;
wire [31:0] ds_pc  ;
assign {ds_inst,ds_pc  } = fs_to_ds_bus_r;wire        rf_we   ;
wire [ 4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0} = ws_to_rf_bus;wire [11:0] alu_op;
wire        load_op;
wire        src1_is_pc;
wire        src2_is_imm;
wire        res_from_mem;
wire        gr_we;
wire        mem_we;
wire        src_reg_is_rd;
wire [ 4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;wire [ 5:0] op_31_26;
wire [ 3:0] op_25_22;
wire [ 1:0] op_21_20;
wire [ 4:0] op_19_15;
wire [ 4:0] rd;
wire [ 4:0] rj;
wire [ 4:0] rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [ 3:0] op_21_20_d;
wire [31:0] op_19_15_d;wire        inst_add_w;
wire        inst_sub_w;
wire        inst_slt;
wire        inst_sltu;
wire        inst_nor;
wire        inst_and;
wire        inst_or;
wire        inst_xor;
wire        inst_slli_w;
wire        inst_srli_w;
wire        inst_srai_w;
wire        inst_addi_w;
wire        inst_ld_w;
wire        inst_st_w;
wire        inst_jirl;
wire        inst_b;
wire        inst_bl;
wire        inst_beq;
wire        inst_bne;
wire        inst_lu12i_w;wire        need_ui5;
wire        need_si12;
wire        need_si16;
wire        need_si20;
wire        need_si26;
wire        src2_is_4;wire [ 4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [ 4:0] rf_raddr2;
wire [31:0] rf_rdata2;wire        rs_eq_rt;assign br_bus       = {br_taken,br_target};assign ds_to_es_bus = {alu_op      ,  //149:138load_op     ,  //137:137src1_is_pc  ,  //136:136src2_is_imm ,  //135:135gr_we       ,  //134:134mem_we      ,  //133:133dest        ,  //132:128imm         ,  //127:96rj_value    ,  //95 :64rkd_value   ,  //63 :32ds_pc          //31 :0};assign ds_ready_go    = ds_valid;
//lab4
// assign ds_ready_go    = ds_valid & ~rs_wait & ~rt_wait;
//lab5
// assign ds_ready_go    = ds_valid  & ~load_stall;
assign ds_allowin     = !ds_valid || ds_ready_go && es_allowin;
assign ds_to_es_valid = ds_valid && ds_ready_go;always @(posedge clk ) beginif (reset) beginds_valid <= 1'b0;endelse if (ds_allowin) beginds_valid <= fs_to_ds_valid;end
end
always @(posedge clk) beginif (fs_to_ds_valid && ds_allowin) beginfs_to_ds_bus_r <= fs_to_ds_bus;end
endassign op_31_26  = ds_inst[31:26];
assign op_25_22  = ds_inst[25:22];
assign op_21_20  = ds_inst[21:20];
assign op_19_15  = ds_inst[19:15];assign rd   = ds_inst[ 4: 0];
assign rj   = ds_inst[ 9: 5];
assign rk   = ds_inst[14:10];assign i12  = ds_inst[21:10];
assign i20  = ds_inst[24: 5];
assign i16  = ds_inst[25:10];
assign i26  = {ds_inst[ 9: 0], ds_inst[25:10]};decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4  u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));assign inst_add_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w  = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02];
assign inst_slt    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu   = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08];
assign inst_and    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09];
assign inst_or     = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a];
assign inst_xor    = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha];
assign inst_ld_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h2];
assign inst_st_w   = op_31_26_d[6'h0a] & op_25_22_d[4'h6];
assign inst_jirl   = op_31_26_d[6'h13];
assign inst_b      = op_31_26_d[6'h14];
assign inst_bl     = op_31_26_d[6'h15];
assign inst_beq    = op_31_26_d[6'h16];
assign inst_bne    = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & ~ds_inst[25];assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w| inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[ 2] = inst_slt;
assign alu_op[ 3] = inst_sltu;
assign alu_op[ 4] = inst_and;
assign alu_op[ 5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[ 7] = inst_xor;
assign alu_op[ 8] = inst_slli_w;
assign alu_op[ 9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;assign need_ui5   =  inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12  =  inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16  =  inst_jirl | inst_beq | inst_bne;
assign need_si20  =  inst_lu12i_w;
assign need_si26  =  inst_b | inst_bl;
assign src2_is_4  =  inst_jirl | inst_bl;//看看是不是要加4.assign imm = src2_is_4 ? 32'h4                      :need_si20 ? {i20[19:0], 12'b0}         :
/*need_ui5 || need_si12*/{{20{i12[11]}}, i12[11:0]} ;assign br_offs = need_si26 ? {{ 4{i26[25]}}, i26[25:0], 2'b0} :{{14{i16[15]}}, i16[15:0], 2'b0} ;
assign jirl_offs = {{14{i16[15]}}, i16[15:0], 2'b0};assign load_op      = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w ;assign src1_is_pc    = inst_jirl | inst_bl;assign src2_is_imm   = inst_slli_w |inst_srli_w |inst_srai_w |inst_addi_w |inst_ld_w   |inst_st_w   |inst_lu12i_w|inst_jirl   |inst_bl     ;assign res_from_mem  = inst_ld_w;
assign dst_is_r1     = inst_bl;
//  是否需要写入通用寄存器
assign gr_we         = ~inst_st_w & ~inst_beq & ~inst_bne & ~inst_b;assign mem_we        = inst_st_w;
//这里需要更改
assign dest          = dst_is_r1 ? 5'd01 : rd;
//阻塞
// assign dest         = dst_is_r31   ? 5'd31 :
//                       dst_is_rt    ? rt    : 
//                       inst_no_dest ? 5'd0  :  rd;assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(.clk    (clk      ),.raddr1 (rf_raddr1),.rdata1 (rf_rdata1),.raddr2 (rf_raddr2),.rdata2 (rf_rdata2),.we     (rf_we    ),.waddr  (rf_waddr ),.wdata  (rf_wdata ));assign rj_value  = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
//                             rs == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
//                             rt == MEM_dest?MEM_result:WB_result)
//                             : rf_rdata2;assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = (   inst_beq  &&  rj_eq_rd|| inst_bne  && !rj_eq_rd|| inst_jirl|| inst_bl|| inst_b) && ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :/*inst_jirl*/ (rj_value + jirl_offs);
//lab4
// wire        src1_no_rs;//指令rs域非0,且不是从寄存器堆读rs
// wire        src2_no_rt;//指令rt域非0,且不是从寄存器堆读rt
// assign src1_no_rs   = 1'b0;
// assign src2_no_rt   = inst_addiu | load_op|inst_jal|inst_lui;// wire        rs_wait,rt_wait;
// assign rs_wait      = ~src1_no_rs & (rs!=5'd0)
//                         & ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));// assign rt_wait      = ~src2_no_rt & (rt!=5'd0)
//                         & ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));// assign br_stall = br_taken & load_stall & {5{ds_valid}};  //增加        
// assign load_stall = (rs_wait & (rs == EXE_dest) & es_load_op ) ||
//                 (rt_wait & (rt == EXE_dest) & es_load_op );  // wire        inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;endmodule
4. EXE_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
5. MEM_stage
`include "mycpu.h"module exe_stage(input                          clk           ,input                          reset         ,//allowininput                          ms_allowin    ,output                         es_allowin    ,//from dsinput                          ds_to_es_valid,input  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus  ,//to msoutput                         es_to_ms_valid,output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus  ,// data sram interfaceoutput        data_sram_en   ,output [ 3:0] data_sram_we  ,output [31:0] data_sram_addr ,output [31:0] data_sram_wdata // output [ 4:0] EXE_dest       ,// output        es_load_op     // output [31:0] EXE_result
);reg         es_valid      ;
wire        es_ready_go   ;reg  [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire [11:0] es_alu_op     ;
wire        es_load_op    ;
// wire        es_src1_is_sa ;  
wire        es_src1_is_pc ;
wire        es_src2_is_imm; 
// wire        es_src2_is_8  ;
wire        es_gr_we      ;
wire        es_mem_we     ;
wire [ 4:0] es_dest       ;
wire [31:0] es_imm        ;
wire [31:0] es_rj_value   ;
wire [31:0] es_rkd_value   ;
wire [31:0] es_pc         ;
assign {es_alu_op      ,  //149:138es_load_op     ,  //137:137es_src1_is_pc  ,  //136:136es_src2_is_imm ,  //135:135es_gr_we       ,  //134:134es_mem_we      ,  //133:133es_dest        ,  //132:128es_imm         ,  //127:96es_rj_value    ,  //95 :64es_rkd_value   ,  //63 :32es_pc          //31 :0} = ds_to_es_bus_r;wire [31:0] es_alu_src1   ;
wire [31:0] es_alu_src2   ;
wire [31:0] es_alu_result ;wire        es_res_from_mem;assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {es_res_from_mem,  //70:70es_gr_we       ,  //69:69es_dest        ,  //68:64es_alu_result  ,  //63:32es_pc             //31:0};assign es_ready_go    = 1'b1;
assign es_allowin     = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid =  es_valid && es_ready_go;
always @(posedge clk) beginif (reset) begines_valid <= 1'b0;endelse if (es_allowin) begines_valid <= ds_to_es_valid;endif (ds_to_es_valid && es_allowin) beginds_to_es_bus_r <= ds_to_es_bus;end
end// assign es_alu_src1 = es_src1_is_sa  ? {27'b0, es_imm[10:6]} : 
//                      es_src1_is_pc  ? es_pc[31:0] :
//                                       es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {{16{es_imm[15]}}, es_imm[15:0]} : 
//                      es_src2_is_8   ? 32'd8 :
//                                       es_rt_value;
assign es_alu_src1 = es_src1_is_pc  ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;alu u_alu(.alu_op     (es_alu_op    ),.alu_src1   (es_alu_src1  ),.alu_src2   (es_alu_src2  ),.alu_result (es_alu_result));assign data_sram_en    = 1'b1;
assign data_sram_we   = es_mem_we&&es_valid ? 4'hf : 4'h0;
assign data_sram_addr  = es_alu_result;
// assign data_sram_wdata = es_rt_value;可能需要改
assign data_sram_wdata = es_rkd_value;// assign EXE_dest = es_dest & {5{es_valid}}; 
// assign EXE_result = es_alu_result;
endmodule
6. WB_stage
`include "mycpu.h"module wb_stage(input                           clk           ,input                           reset         ,//allowinoutput                          ws_allowin    ,//from msinput                           ms_to_ws_valid,input  [`MS_TO_WS_BUS_WD -1:0]  ms_to_ws_bus  ,//to rf: for write backoutput [`WS_TO_RF_BUS_WD -1:0]  ws_to_rf_bus  ,//trace debug interfaceoutput [31:0] debug_wb_pc     ,output [ 3:0] debug_wb_rf_we ,output [ 4:0] debug_wb_rf_wnum,output [31:0] debug_wb_rf_wdata // lab4// output [ 4:0] WB_dest         ,//lab5// output [31:0] WB_result
);
reg         ws_valid;
wire        ws_ready_go;reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r;
wire        ws_gr_we;
wire [ 4:0] ws_dest;
wire [31:0] ws_final_result;
wire [31:0] ws_pc;
assign {ws_gr_we       ,  //69:69ws_dest        ,  //68:64ws_final_result,  //63:32ws_pc             //31:0} = ms_to_ws_bus_r;wire        rf_we;
wire [4 :0] rf_waddr;
wire [31:0] rf_wdata;
assign ws_to_rf_bus = {rf_we   ,  //37:37rf_waddr,  //36:32rf_wdata   //31:0};assign ws_ready_go = 1'b1;
assign ws_allowin  = !ws_valid || ws_ready_go;
always @(posedge clk) beginif (reset) beginws_valid <= 1'b0;endelse if (ws_allowin) beginws_valid <= ms_to_ws_valid;endif (ms_to_ws_valid && ws_allowin) beginms_to_ws_bus_r <= ms_to_ws_bus;end
endassign rf_we    = ws_gr_we&&ws_valid;
assign rf_waddr = ws_dest;
assign rf_wdata = ws_final_result;// debug info generate
assign debug_wb_pc       = ws_pc;
assign debug_wb_rf_we   = {4{rf_we}};
assign debug_wb_rf_wnum  = ws_dest;
assign debug_wb_rf_wdata = ws_final_result;// assign WB_dest = ws_dest & {5{ws_valid}};
// assign WB_result = ws_final_result;
endmodule

这篇关于LoongArch 五级流水线实现的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/301827

相关文章

hdu1043(八数码问题,广搜 + hash(实现状态压缩) )

利用康拓展开将一个排列映射成一个自然数,然后就变成了普通的广搜题。 #include<iostream>#include<algorithm>#include<string>#include<stack>#include<queue>#include<map>#include<stdio.h>#include<stdlib.h>#include<ctype.h>#inclu

【C++】_list常用方法解析及模拟实现

相信自己的力量,只要对自己始终保持信心,尽自己最大努力去完成任何事,就算事情最终结果是失败了,努力了也不留遗憾。💓💓💓 目录   ✨说在前面 🍋知识点一:什么是list? •🌰1.list的定义 •🌰2.list的基本特性 •🌰3.常用接口介绍 🍋知识点二:list常用接口 •🌰1.默认成员函数 🔥构造函数(⭐) 🔥析构函数 •🌰2.list对象

【Prometheus】PromQL向量匹配实现不同标签的向量数据进行运算

✨✨ 欢迎大家来到景天科技苑✨✨ 🎈🎈 养成好习惯,先赞后看哦~🎈🎈 🏆 作者简介:景天科技苑 🏆《头衔》:大厂架构师,华为云开发者社区专家博主,阿里云开发者社区专家博主,CSDN全栈领域优质创作者,掘金优秀博主,51CTO博客专家等。 🏆《博客》:Python全栈,前后端开发,小程序开发,人工智能,js逆向,App逆向,网络系统安全,数据分析,Django,fastapi

让树莓派智能语音助手实现定时提醒功能

最初的时候是想直接在rasa 的chatbot上实现,因为rasa本身是带有remindschedule模块的。不过经过一番折腾后,忽然发现,chatbot上实现的定时,语音助手不一定会有响应。因为,我目前语音助手的代码设置了长时间无应答会结束对话,这样一来,chatbot定时提醒的触发就不会被语音助手获悉。那怎么让语音助手也具有定时提醒功能呢? 我最后选择的方法是用threading.Time

Android实现任意版本设置默认的锁屏壁纸和桌面壁纸(两张壁纸可不一致)

客户有些需求需要设置默认壁纸和锁屏壁纸  在默认情况下 这两个壁纸是相同的  如果需要默认的锁屏壁纸和桌面壁纸不一样 需要额外修改 Android13实现 替换默认桌面壁纸: 将图片文件替换frameworks/base/core/res/res/drawable-nodpi/default_wallpaper.*  (注意不能是bmp格式) 替换默认锁屏壁纸: 将图片资源放入vendo

C#实战|大乐透选号器[6]:实现实时显示已选择的红蓝球数量

哈喽,你好啊,我是雷工。 关于大乐透选号器在前面已经记录了5篇笔记,这是第6篇; 接下来实现实时显示当前选中红球数量,蓝球数量; 以下为练习笔记。 01 效果演示 当选择和取消选择红球或蓝球时,在对应的位置显示实时已选择的红球、蓝球的数量; 02 标签名称 分别设置Label标签名称为:lblRedCount、lblBlueCount

Kubernetes PodSecurityPolicy:PSP能实现的5种主要安全策略

Kubernetes PodSecurityPolicy:PSP能实现的5种主要安全策略 1. 特权模式限制2. 宿主机资源隔离3. 用户和组管理4. 权限提升控制5. SELinux配置 💖The Begin💖点点关注,收藏不迷路💖 Kubernetes的PodSecurityPolicy(PSP)是一个关键的安全特性,它在Pod创建之前实施安全策略,确保P

工厂ERP管理系统实现源码(JAVA)

工厂进销存管理系统是一个集采购管理、仓库管理、生产管理和销售管理于一体的综合解决方案。该系统旨在帮助企业优化流程、提高效率、降低成本,并实时掌握各环节的运营状况。 在采购管理方面,系统能够处理采购订单、供应商管理和采购入库等流程,确保采购过程的透明和高效。仓库管理方面,实现库存的精准管理,包括入库、出库、盘点等操作,确保库存数据的准确性和实时性。 生产管理模块则涵盖了生产计划制定、物料需求计划、

C++——stack、queue的实现及deque的介绍

目录 1.stack与queue的实现 1.1stack的实现  1.2 queue的实现 2.重温vector、list、stack、queue的介绍 2.1 STL标准库中stack和queue的底层结构  3.deque的简单介绍 3.1为什么选择deque作为stack和queue的底层默认容器  3.2 STL中对stack与queue的模拟实现 ①stack模拟实现

基于51单片机的自动转向修复系统的设计与实现

文章目录 前言资料获取设计介绍功能介绍设计清单具体实现截图参考文献设计获取 前言 💗博主介绍:✌全网粉丝10W+,CSDN特邀作者、博客专家、CSDN新星计划导师,一名热衷于单片机技术探索与分享的博主、专注于 精通51/STM32/MSP430/AVR等单片机设计 主要对象是咱们电子相关专业的大学生,希望您们都共创辉煌!✌💗 👇🏻 精彩专栏 推荐订阅👇🏻 单片机