cpu: more instruction pipelining
This is needed to make WNS room for fetching instructions from SRAM.
This commit is contained in:
174
cpu/cpu.vhdl
174
cpu/cpu.vhdl
@@ -49,7 +49,7 @@ architecture behavior of cpu is
|
||||
|
||||
signal load_reg_next, load_reg: std_logic_vector(15 downto 0);
|
||||
signal load_addr_next, load_addr: std_logic_vector(15 downto 0);
|
||||
signal hold_inst_next, hold_inst: std_logic_vector(15 downto 0);
|
||||
signal inst_next, inst: std_logic_vector(15 downto 0);
|
||||
|
||||
type regbank is array(0 to 15) of std_logic_vector(15 downto 0);
|
||||
signal reg_d: regbank;
|
||||
@@ -62,7 +62,6 @@ begin
|
||||
|
||||
load_reg_r: reg port map(clk => clk, rst => rst, d => load_reg_next, q => load_reg);
|
||||
load_addr_r: reg port map(clk => clk, rst => rst, d => load_addr_next, q => load_addr);
|
||||
hold_inst_r: reg port map(clk => clk, rst => rst, d => hold_inst_next, q => hold_inst);
|
||||
|
||||
allregs:
|
||||
for i in 0 to 15 generate
|
||||
@@ -73,15 +72,16 @@ begin
|
||||
begin
|
||||
if rst = '1' then
|
||||
cpu_state <= BRANCH; -- wait a cycle at first
|
||||
inst <= x"0000";
|
||||
elsif rising_edge(clk) then
|
||||
cpu_state <= cpu_state_next;
|
||||
inst <= inst_next;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
code_addr <= reg_q(14);
|
||||
|
||||
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, hold_inst) is
|
||||
variable inst: std_logic_vector(15 downto 0);
|
||||
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, inst) is
|
||||
variable regn_0: natural;
|
||||
variable regn_1: natural;
|
||||
variable regn_2: natural;
|
||||
@@ -110,9 +110,9 @@ begin
|
||||
case cpu_state is
|
||||
when RUN =>
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
inst := code_data;
|
||||
inst_next <= code_data;
|
||||
when LOAD =>
|
||||
inst := x"0000"; -- NOP
|
||||
inst_next <= inst;
|
||||
mem_addr <= load_addr; -- maintain this until we're done reading
|
||||
if load_reg(3 downto 0) = x"e" then
|
||||
cpu_state_next <= BRANCH;
|
||||
@@ -127,93 +127,99 @@ begin
|
||||
reg_d(regn_0) <= mem_in;
|
||||
end if;
|
||||
when BRANCH =>
|
||||
inst := x"0000"; -- NOP
|
||||
inst_next <= x"0000"; -- NOP
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
when WAIT_MEM =>
|
||||
cpu_state_next <= RUN;
|
||||
inst := hold_inst;
|
||||
inst_next <= inst;
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
end case;
|
||||
|
||||
hold_inst_next <= inst;
|
||||
|
||||
regn_0 := to_integer(unsigned(inst(11 downto 8)));
|
||||
regn_1 := to_integer(unsigned(inst(7 downto 4)));
|
||||
regn_2 := to_integer(unsigned(inst(3 downto 0)));
|
||||
|
||||
case inst(15 downto 12) is
|
||||
when "0000" => -- NOP
|
||||
when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits)
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_read <= '1';
|
||||
cpu_state_next <= LOAD;
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
|
||||
load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
load_reg_next(3 downto 0) <= inst(11 downto 8);
|
||||
end if;
|
||||
|
||||
when "0010" => -- STORE rn, [rm, imm]
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_write <= '1';
|
||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
mem_out <= reg_q(regn_0);
|
||||
end if;
|
||||
|
||||
--- ALU stuff
|
||||
when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm)
|
||||
when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm)
|
||||
when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm)
|
||||
when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm)
|
||||
when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn)
|
||||
when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm)
|
||||
when "1001" => -- SETH rd, imm
|
||||
reg_d(regn_0)(15 downto 8) <= inst(7 downto 0);
|
||||
when "1010" => -- SHR rd, rn, imm (rd := rn >> imm)
|
||||
alu_sel <= inst(15 downto 12);
|
||||
alu_a <= reg_q(regn_1);
|
||||
alu_b <= x"000" & inst(3 downto 0);
|
||||
reg_d(regn_0) <= alu_q;
|
||||
when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm)
|
||||
|
||||
when "1100" => -- CMP rn, rm (flag := 1 if equal)
|
||||
alu_sel <= "1100";
|
||||
alu_a <= reg_q(regn_0);
|
||||
alu_b <= reg_q(regn_1);
|
||||
reg_d(15)(0) <= alu_flag;
|
||||
|
||||
when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits)
|
||||
if reg_q(15)(0) = '1' then
|
||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit)
|
||||
reg_d(regn_0) <= x"00" & inst(7 downto 0);
|
||||
when "1111" => -- BNEQ imm
|
||||
if reg_q(15)(0) = '0' then
|
||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
|
||||
when others => -- do nothing
|
||||
end case;
|
||||
|
||||
if do_alu = '1' then
|
||||
-- 1:1 mapping
|
||||
alu_sel <= inst(15 downto 12);
|
||||
alu_a <= reg_q(regn_1);
|
||||
alu_b <= reg_q(regn_2);
|
||||
reg_d(regn_0) <= alu_q;
|
||||
reg_d(15)(0) <= alu_flag;
|
||||
if inst(11 downto 8) = x"e" then
|
||||
cpu_state_next <= BRANCH;
|
||||
if cpu_state = RUN then
|
||||
regn_0 := to_integer(unsigned(inst(11 downto 8)));
|
||||
regn_1 := to_integer(unsigned(inst(7 downto 4)));
|
||||
regn_2 := to_integer(unsigned(inst(3 downto 0)));
|
||||
|
||||
case inst(15 downto 12) is
|
||||
when "0000" => -- NOP
|
||||
when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits)
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
inst_next <= inst;
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_read <= '1';
|
||||
cpu_state_next <= LOAD;
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
-- inst_next <= inst;
|
||||
|
||||
load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
load_reg_next(3 downto 0) <= inst(11 downto 8);
|
||||
end if;
|
||||
|
||||
when "0010" => -- STORE rn, [rm, imm]
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
inst_next <= inst;
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_write <= '1';
|
||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
mem_out <= reg_q(regn_0);
|
||||
end if;
|
||||
|
||||
--- ALU stuff
|
||||
when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm)
|
||||
when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm)
|
||||
when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm)
|
||||
when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm)
|
||||
when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn)
|
||||
when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm)
|
||||
when "1001" => -- SETH rd, imm
|
||||
reg_d(regn_0)(15 downto 8) <= inst(7 downto 0);
|
||||
when "1010" => -- SHR rd, rn, imm (rd := rn >> imm)
|
||||
alu_sel <= inst(15 downto 12);
|
||||
alu_a <= reg_q(regn_1);
|
||||
alu_b <= x"000" & inst(3 downto 0);
|
||||
reg_d(regn_0) <= alu_q;
|
||||
when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm)
|
||||
|
||||
when "1100" => -- CMP rn, rm (flag := 1 if equal)
|
||||
alu_sel <= "1100";
|
||||
alu_a <= reg_q(regn_0);
|
||||
alu_b <= reg_q(regn_1);
|
||||
reg_d(15)(0) <= alu_flag;
|
||||
|
||||
when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits)
|
||||
if reg_q(15)(0) = '1' then
|
||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit)
|
||||
reg_d(regn_0) <= x"00" & inst(7 downto 0);
|
||||
when "1111" => -- BNEQ imm
|
||||
if reg_q(15)(0) = '0' then
|
||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
|
||||
when others => -- do nothing
|
||||
end case;
|
||||
|
||||
if do_alu = '1' then
|
||||
-- 1:1 mapping
|
||||
alu_sel <= inst(15 downto 12);
|
||||
alu_a <= reg_q(regn_1);
|
||||
alu_b <= reg_q(regn_2);
|
||||
reg_d(regn_0) <= alu_q;
|
||||
reg_d(15)(0) <= alu_flag;
|
||||
if inst(11 downto 8) = x"e" then
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
Reference in New Issue
Block a user