cpu: more instruction pipelining
This is needed to make WNS room for fetching instructions from SRAM.
This commit is contained in:
parent
6825ce464f
commit
36bc1417b6
174
cpu/cpu.vhdl
174
cpu/cpu.vhdl
@ -49,7 +49,7 @@ architecture behavior of cpu is
|
|||||||
|
|
||||||
signal load_reg_next, load_reg: std_logic_vector(15 downto 0);
|
signal load_reg_next, load_reg: std_logic_vector(15 downto 0);
|
||||||
signal load_addr_next, load_addr: std_logic_vector(15 downto 0);
|
signal load_addr_next, load_addr: std_logic_vector(15 downto 0);
|
||||||
signal hold_inst_next, hold_inst: std_logic_vector(15 downto 0);
|
signal inst_next, inst: std_logic_vector(15 downto 0);
|
||||||
|
|
||||||
type regbank is array(0 to 15) of std_logic_vector(15 downto 0);
|
type regbank is array(0 to 15) of std_logic_vector(15 downto 0);
|
||||||
signal reg_d: regbank;
|
signal reg_d: regbank;
|
||||||
@ -62,7 +62,6 @@ begin
|
|||||||
|
|
||||||
load_reg_r: reg port map(clk => clk, rst => rst, d => load_reg_next, q => load_reg);
|
load_reg_r: reg port map(clk => clk, rst => rst, d => load_reg_next, q => load_reg);
|
||||||
load_addr_r: reg port map(clk => clk, rst => rst, d => load_addr_next, q => load_addr);
|
load_addr_r: reg port map(clk => clk, rst => rst, d => load_addr_next, q => load_addr);
|
||||||
hold_inst_r: reg port map(clk => clk, rst => rst, d => hold_inst_next, q => hold_inst);
|
|
||||||
|
|
||||||
allregs:
|
allregs:
|
||||||
for i in 0 to 15 generate
|
for i in 0 to 15 generate
|
||||||
@ -73,15 +72,16 @@ begin
|
|||||||
begin
|
begin
|
||||||
if rst = '1' then
|
if rst = '1' then
|
||||||
cpu_state <= BRANCH; -- wait a cycle at first
|
cpu_state <= BRANCH; -- wait a cycle at first
|
||||||
|
inst <= x"0000";
|
||||||
elsif rising_edge(clk) then
|
elsif rising_edge(clk) then
|
||||||
cpu_state <= cpu_state_next;
|
cpu_state <= cpu_state_next;
|
||||||
|
inst <= inst_next;
|
||||||
end if;
|
end if;
|
||||||
end process;
|
end process;
|
||||||
|
|
||||||
code_addr <= reg_q(14);
|
code_addr <= reg_q(14);
|
||||||
|
|
||||||
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, hold_inst) is
|
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, inst) is
|
||||||
variable inst: std_logic_vector(15 downto 0);
|
|
||||||
variable regn_0: natural;
|
variable regn_0: natural;
|
||||||
variable regn_1: natural;
|
variable regn_1: natural;
|
||||||
variable regn_2: natural;
|
variable regn_2: natural;
|
||||||
@ -110,9 +110,9 @@ begin
|
|||||||
case cpu_state is
|
case cpu_state is
|
||||||
when RUN =>
|
when RUN =>
|
||||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||||
inst := code_data;
|
inst_next <= code_data;
|
||||||
when LOAD =>
|
when LOAD =>
|
||||||
inst := x"0000"; -- NOP
|
inst_next <= inst;
|
||||||
mem_addr <= load_addr; -- maintain this until we're done reading
|
mem_addr <= load_addr; -- maintain this until we're done reading
|
||||||
if load_reg(3 downto 0) = x"e" then
|
if load_reg(3 downto 0) = x"e" then
|
||||||
cpu_state_next <= BRANCH;
|
cpu_state_next <= BRANCH;
|
||||||
@ -127,93 +127,99 @@ begin
|
|||||||
reg_d(regn_0) <= mem_in;
|
reg_d(regn_0) <= mem_in;
|
||||||
end if;
|
end if;
|
||||||
when BRANCH =>
|
when BRANCH =>
|
||||||
inst := x"0000"; -- NOP
|
inst_next <= x"0000"; -- NOP
|
||||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||||
when WAIT_MEM =>
|
when WAIT_MEM =>
|
||||||
cpu_state_next <= RUN;
|
inst_next <= inst;
|
||||||
inst := hold_inst;
|
|
||||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||||
end case;
|
|
||||||
|
|
||||||
hold_inst_next <= inst;
|
|
||||||
|
|
||||||
regn_0 := to_integer(unsigned(inst(11 downto 8)));
|
|
||||||
regn_1 := to_integer(unsigned(inst(7 downto 4)));
|
|
||||||
regn_2 := to_integer(unsigned(inst(3 downto 0)));
|
|
||||||
|
|
||||||
case inst(15 downto 12) is
|
|
||||||
when "0000" => -- NOP
|
|
||||||
when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits)
|
|
||||||
if mem_busy = '1' then
|
if mem_busy = '1' then
|
||||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||||
cpu_state_next <= WAIT_MEM;
|
cpu_state_next <= WAIT_MEM;
|
||||||
else
|
|
||||||
mem_read <= '1';
|
|
||||||
cpu_state_next <= LOAD;
|
|
||||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
|
||||||
|
|
||||||
load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
|
||||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
|
||||||
load_reg_next(3 downto 0) <= inst(11 downto 8);
|
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when "0010" => -- STORE rn, [rm, imm]
|
|
||||||
if mem_busy = '1' then
|
|
||||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
|
||||||
cpu_state_next <= WAIT_MEM;
|
|
||||||
else
|
|
||||||
mem_write <= '1';
|
|
||||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
|
||||||
mem_out <= reg_q(regn_0);
|
|
||||||
end if;
|
|
||||||
|
|
||||||
--- ALU stuff
|
|
||||||
when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm)
|
|
||||||
when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm)
|
|
||||||
when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm)
|
|
||||||
when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm)
|
|
||||||
when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn)
|
|
||||||
when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm)
|
|
||||||
when "1001" => -- SETH rd, imm
|
|
||||||
reg_d(regn_0)(15 downto 8) <= inst(7 downto 0);
|
|
||||||
when "1010" => -- SHR rd, rn, imm (rd := rn >> imm)
|
|
||||||
alu_sel <= inst(15 downto 12);
|
|
||||||
alu_a <= reg_q(regn_1);
|
|
||||||
alu_b <= x"000" & inst(3 downto 0);
|
|
||||||
reg_d(regn_0) <= alu_q;
|
|
||||||
when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm)
|
|
||||||
|
|
||||||
when "1100" => -- CMP rn, rm (flag := 1 if equal)
|
|
||||||
alu_sel <= "1100";
|
|
||||||
alu_a <= reg_q(regn_0);
|
|
||||||
alu_b <= reg_q(regn_1);
|
|
||||||
reg_d(15)(0) <= alu_flag;
|
|
||||||
|
|
||||||
when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits)
|
|
||||||
if reg_q(15)(0) = '1' then
|
|
||||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
|
||||||
cpu_state_next <= BRANCH;
|
|
||||||
end if;
|
|
||||||
when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit)
|
|
||||||
reg_d(regn_0) <= x"00" & inst(7 downto 0);
|
|
||||||
when "1111" => -- BNEQ imm
|
|
||||||
if reg_q(15)(0) = '0' then
|
|
||||||
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
|
||||||
cpu_state_next <= BRANCH;
|
|
||||||
end if;
|
|
||||||
|
|
||||||
when others => -- do nothing
|
|
||||||
end case;
|
end case;
|
||||||
|
|
||||||
if do_alu = '1' then
|
if cpu_state = RUN then
|
||||||
-- 1:1 mapping
|
regn_0 := to_integer(unsigned(inst(11 downto 8)));
|
||||||
alu_sel <= inst(15 downto 12);
|
regn_1 := to_integer(unsigned(inst(7 downto 4)));
|
||||||
alu_a <= reg_q(regn_1);
|
regn_2 := to_integer(unsigned(inst(3 downto 0)));
|
||||||
alu_b <= reg_q(regn_2);
|
|
||||||
reg_d(regn_0) <= alu_q;
|
case inst(15 downto 12) is
|
||||||
reg_d(15)(0) <= alu_flag;
|
when "0000" => -- NOP
|
||||||
if inst(11 downto 8) = x"e" then
|
when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits)
|
||||||
cpu_state_next <= BRANCH;
|
if mem_busy = '1' then
|
||||||
|
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||||
|
inst_next <= inst;
|
||||||
|
cpu_state_next <= WAIT_MEM;
|
||||||
|
else
|
||||||
|
mem_read <= '1';
|
||||||
|
cpu_state_next <= LOAD;
|
||||||
|
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||||
|
-- inst_next <= inst;
|
||||||
|
|
||||||
|
load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||||
|
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||||
|
load_reg_next(3 downto 0) <= inst(11 downto 8);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
when "0010" => -- STORE rn, [rm, imm]
|
||||||
|
if mem_busy = '1' then
|
||||||
|
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||||
|
inst_next <= inst;
|
||||||
|
cpu_state_next <= WAIT_MEM;
|
||||||
|
else
|
||||||
|
mem_write <= '1';
|
||||||
|
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||||
|
mem_out <= reg_q(regn_0);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
--- ALU stuff
|
||||||
|
when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm)
|
||||||
|
when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm)
|
||||||
|
when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm)
|
||||||
|
when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm)
|
||||||
|
when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn)
|
||||||
|
when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm)
|
||||||
|
when "1001" => -- SETH rd, imm
|
||||||
|
reg_d(regn_0)(15 downto 8) <= inst(7 downto 0);
|
||||||
|
when "1010" => -- SHR rd, rn, imm (rd := rn >> imm)
|
||||||
|
alu_sel <= inst(15 downto 12);
|
||||||
|
alu_a <= reg_q(regn_1);
|
||||||
|
alu_b <= x"000" & inst(3 downto 0);
|
||||||
|
reg_d(regn_0) <= alu_q;
|
||||||
|
when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm)
|
||||||
|
|
||||||
|
when "1100" => -- CMP rn, rm (flag := 1 if equal)
|
||||||
|
alu_sel <= "1100";
|
||||||
|
alu_a <= reg_q(regn_0);
|
||||||
|
alu_b <= reg_q(regn_1);
|
||||||
|
reg_d(15)(0) <= alu_flag;
|
||||||
|
|
||||||
|
when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits)
|
||||||
|
if reg_q(15)(0) = '1' then
|
||||||
|
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||||
|
cpu_state_next <= BRANCH;
|
||||||
|
end if;
|
||||||
|
when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit)
|
||||||
|
reg_d(regn_0) <= x"00" & inst(7 downto 0);
|
||||||
|
when "1111" => -- BNEQ imm
|
||||||
|
if reg_q(15)(0) = '0' then
|
||||||
|
reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0'));
|
||||||
|
cpu_state_next <= BRANCH;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
when others => -- do nothing
|
||||||
|
end case;
|
||||||
|
|
||||||
|
if do_alu = '1' then
|
||||||
|
-- 1:1 mapping
|
||||||
|
alu_sel <= inst(15 downto 12);
|
||||||
|
alu_a <= reg_q(regn_1);
|
||||||
|
alu_b <= reg_q(regn_2);
|
||||||
|
reg_d(regn_0) <= alu_q;
|
||||||
|
reg_d(15)(0) <= alu_flag;
|
||||||
|
if inst(11 downto 8) = x"e" then
|
||||||
|
cpu_state_next <= BRANCH;
|
||||||
|
end if;
|
||||||
end if;
|
end if;
|
||||||
end if;
|
end if;
|
||||||
end process;
|
end process;
|
||||||
|
@ -92,7 +92,7 @@ def generate_ops(ops, labels, relocs):
|
|||||||
if isinstance(p, str): # label ref
|
if isinstance(p, str): # label ref
|
||||||
if len(params) == 1: # branch
|
if len(params) == 1: # branch
|
||||||
yield 14 # pc
|
yield 14 # pc
|
||||||
yield labels[p] - pc - 2
|
yield labels[p] - pc - 4
|
||||||
else: # set, allow relocs here
|
else: # set, allow relocs here
|
||||||
relocs.append((pc, p))
|
relocs.append((pc, p))
|
||||||
yield 0xff
|
yield 0xff
|
||||||
|
15
tools/cc.py
15
tools/cc.py
@ -473,10 +473,10 @@ class ShlOp(BinOp):
|
|||||||
return [f'set {sc1}, 1',
|
return [f'set {sc1}, 1',
|
||||||
f'or {self.dest}, {self.left}, {self.left}',
|
f'or {self.dest}, {self.left}, {self.left}',
|
||||||
f'sub {sc0}, {self.right}, {sc1}',
|
f'sub {sc0}, {self.right}, {sc1}',
|
||||||
f'beq [pc, 6]',
|
f'beq [pc, 4]',
|
||||||
f'add {self.dest}, {self.dest}, {self.dest}',
|
f'add {self.dest}, {self.dest}, {self.dest}',
|
||||||
f'sub {sc0}, {sc0}, {sc1}',
|
f'sub {sc0}, {sc0}, {sc1}',
|
||||||
f'bneq [pc, -6]']
|
f'bneq [pc, -8]']
|
||||||
|
|
||||||
|
|
||||||
class LtOp(BinOp):
|
class LtOp(BinOp):
|
||||||
@ -485,7 +485,7 @@ class LtOp(BinOp):
|
|||||||
sc0 = scratches[0]
|
sc0 = scratches[0]
|
||||||
return [f'set {self.dest}, 0',
|
return [f'set {self.dest}, 0',
|
||||||
f'sub {sc0}, {self.left}, {self.right}',
|
f'sub {sc0}, {self.left}, {self.right}',
|
||||||
f'bneq [pc, 2]',
|
f'bneq [pc, 0]',
|
||||||
f'set {self.dest}, 1']
|
f'set {self.dest}, 1']
|
||||||
|
|
||||||
class GtOp(LtOp):
|
class GtOp(LtOp):
|
||||||
@ -531,7 +531,7 @@ class BoolNot(UnOp):
|
|||||||
def synth(self, scratches):
|
def synth(self, scratches):
|
||||||
return [f'set {self.dest}, 0',
|
return [f'set {self.dest}, 0',
|
||||||
f'cmp {self.dest}, {self.operand}',
|
f'cmp {self.dest}, {self.operand}',
|
||||||
f'bneq [pc, 2]',
|
f'bneq [pc, 0]',
|
||||||
f'set {self.dest}, 1']
|
f'set {self.dest}, 1']
|
||||||
|
|
||||||
class NeqOp(BinOp):
|
class NeqOp(BinOp):
|
||||||
@ -555,7 +555,7 @@ class FnCall(AsmOp):
|
|||||||
sc0 = scratches[0]
|
sc0 = scratches[0]
|
||||||
fn = self.dest_fn
|
fn = self.dest_fn
|
||||||
|
|
||||||
return out + [f'set {sc0}, 2',
|
return out + [f'set {sc0}, 0',
|
||||||
f'add lr, pc, {sc0}',
|
f'add lr, pc, {sc0}',
|
||||||
f'or pc, {fn}, {fn}']
|
f'or pc, {fn}, {fn}']
|
||||||
|
|
||||||
@ -1375,10 +1375,11 @@ preamble = [f'_start:',
|
|||||||
f'set sp, 0',
|
f'set sp, 0',
|
||||||
f'seth sp, {0x11}', # 256 bytes of stack ought to be enough
|
f'seth sp, {0x11}', # 256 bytes of stack ought to be enough
|
||||||
f'set r2, main',
|
f'set r2, main',
|
||||||
f'set r3, 2',
|
f'set r3, 0',
|
||||||
f'add lr, pc, r3',
|
f'add lr, pc, r3',
|
||||||
f'or pc, r2, r2',
|
f'or pc, r2, r2',
|
||||||
f'or pc, pc, pc // loop forever',
|
f'cmp r0, r0',
|
||||||
|
f'beq [pc, -4] // loop forever',
|
||||||
]
|
]
|
||||||
|
|
||||||
def filter_dupes(ops):
|
def filter_dupes(ops):
|
||||||
|
Loading…
Reference in New Issue
Block a user