cpu: more instruction pipelining
This is needed to make WNS room for fetching instructions from SRAM.
This commit is contained in:
parent
6825ce464f
commit
36bc1417b6
28
cpu/cpu.vhdl
28
cpu/cpu.vhdl
@ -49,7 +49,7 @@ architecture behavior of cpu is
|
||||
|
||||
signal load_reg_next, load_reg: std_logic_vector(15 downto 0);
|
||||
signal load_addr_next, load_addr: std_logic_vector(15 downto 0);
|
||||
signal hold_inst_next, hold_inst: std_logic_vector(15 downto 0);
|
||||
signal inst_next, inst: std_logic_vector(15 downto 0);
|
||||
|
||||
type regbank is array(0 to 15) of std_logic_vector(15 downto 0);
|
||||
signal reg_d: regbank;
|
||||
@ -62,7 +62,6 @@ begin
|
||||
|
||||
load_reg_r: reg port map(clk => clk, rst => rst, d => load_reg_next, q => load_reg);
|
||||
load_addr_r: reg port map(clk => clk, rst => rst, d => load_addr_next, q => load_addr);
|
||||
hold_inst_r: reg port map(clk => clk, rst => rst, d => hold_inst_next, q => hold_inst);
|
||||
|
||||
allregs:
|
||||
for i in 0 to 15 generate
|
||||
@ -73,15 +72,16 @@ begin
|
||||
begin
|
||||
if rst = '1' then
|
||||
cpu_state <= BRANCH; -- wait a cycle at first
|
||||
inst <= x"0000";
|
||||
elsif rising_edge(clk) then
|
||||
cpu_state <= cpu_state_next;
|
||||
inst <= inst_next;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
code_addr <= reg_q(14);
|
||||
|
||||
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, hold_inst) is
|
||||
variable inst: std_logic_vector(15 downto 0);
|
||||
process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, inst) is
|
||||
variable regn_0: natural;
|
||||
variable regn_1: natural;
|
||||
variable regn_2: natural;
|
||||
@ -110,9 +110,9 @@ begin
|
||||
case cpu_state is
|
||||
when RUN =>
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
inst := code_data;
|
||||
inst_next <= code_data;
|
||||
when LOAD =>
|
||||
inst := x"0000"; -- NOP
|
||||
inst_next <= inst;
|
||||
mem_addr <= load_addr; -- maintain this until we're done reading
|
||||
if load_reg(3 downto 0) = x"e" then
|
||||
cpu_state_next <= BRANCH;
|
||||
@ -127,16 +127,18 @@ begin
|
||||
reg_d(regn_0) <= mem_in;
|
||||
end if;
|
||||
when BRANCH =>
|
||||
inst := x"0000"; -- NOP
|
||||
inst_next <= x"0000"; -- NOP
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
when WAIT_MEM =>
|
||||
cpu_state_next <= RUN;
|
||||
inst := hold_inst;
|
||||
inst_next <= inst;
|
||||
reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2);
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
end if;
|
||||
end case;
|
||||
|
||||
hold_inst_next <= inst;
|
||||
|
||||
if cpu_state = RUN then
|
||||
regn_0 := to_integer(unsigned(inst(11 downto 8)));
|
||||
regn_1 := to_integer(unsigned(inst(7 downto 4)));
|
||||
regn_2 := to_integer(unsigned(inst(3 downto 0)));
|
||||
@ -146,11 +148,13 @@ begin
|
||||
when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits)
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
inst_next <= inst;
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_read <= '1';
|
||||
cpu_state_next <= LOAD;
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
-- inst_next <= inst;
|
||||
|
||||
load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0'));
|
||||
@ -160,6 +164,7 @@ begin
|
||||
when "0010" => -- STORE rn, [rm, imm]
|
||||
if mem_busy = '1' then
|
||||
reg_d(14) <= reg_q(14); -- halt the prefetcher
|
||||
inst_next <= inst;
|
||||
cpu_state_next <= WAIT_MEM;
|
||||
else
|
||||
mem_write <= '1';
|
||||
@ -216,6 +221,7 @@ begin
|
||||
cpu_state_next <= BRANCH;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
end behavior;
|
||||
|
@ -92,7 +92,7 @@ def generate_ops(ops, labels, relocs):
|
||||
if isinstance(p, str): # label ref
|
||||
if len(params) == 1: # branch
|
||||
yield 14 # pc
|
||||
yield labels[p] - pc - 2
|
||||
yield labels[p] - pc - 4
|
||||
else: # set, allow relocs here
|
||||
relocs.append((pc, p))
|
||||
yield 0xff
|
||||
|
15
tools/cc.py
15
tools/cc.py
@ -473,10 +473,10 @@ class ShlOp(BinOp):
|
||||
return [f'set {sc1}, 1',
|
||||
f'or {self.dest}, {self.left}, {self.left}',
|
||||
f'sub {sc0}, {self.right}, {sc1}',
|
||||
f'beq [pc, 6]',
|
||||
f'beq [pc, 4]',
|
||||
f'add {self.dest}, {self.dest}, {self.dest}',
|
||||
f'sub {sc0}, {sc0}, {sc1}',
|
||||
f'bneq [pc, -6]']
|
||||
f'bneq [pc, -8]']
|
||||
|
||||
|
||||
class LtOp(BinOp):
|
||||
@ -485,7 +485,7 @@ class LtOp(BinOp):
|
||||
sc0 = scratches[0]
|
||||
return [f'set {self.dest}, 0',
|
||||
f'sub {sc0}, {self.left}, {self.right}',
|
||||
f'bneq [pc, 2]',
|
||||
f'bneq [pc, 0]',
|
||||
f'set {self.dest}, 1']
|
||||
|
||||
class GtOp(LtOp):
|
||||
@ -531,7 +531,7 @@ class BoolNot(UnOp):
|
||||
def synth(self, scratches):
|
||||
return [f'set {self.dest}, 0',
|
||||
f'cmp {self.dest}, {self.operand}',
|
||||
f'bneq [pc, 2]',
|
||||
f'bneq [pc, 0]',
|
||||
f'set {self.dest}, 1']
|
||||
|
||||
class NeqOp(BinOp):
|
||||
@ -555,7 +555,7 @@ class FnCall(AsmOp):
|
||||
sc0 = scratches[0]
|
||||
fn = self.dest_fn
|
||||
|
||||
return out + [f'set {sc0}, 2',
|
||||
return out + [f'set {sc0}, 0',
|
||||
f'add lr, pc, {sc0}',
|
||||
f'or pc, {fn}, {fn}']
|
||||
|
||||
@ -1375,10 +1375,11 @@ preamble = [f'_start:',
|
||||
f'set sp, 0',
|
||||
f'seth sp, {0x11}', # 256 bytes of stack ought to be enough
|
||||
f'set r2, main',
|
||||
f'set r3, 2',
|
||||
f'set r3, 0',
|
||||
f'add lr, pc, r3',
|
||||
f'or pc, r2, r2',
|
||||
f'or pc, pc, pc // loop forever',
|
||||
f'cmp r0, r0',
|
||||
f'beq [pc, -4] // loop forever',
|
||||
]
|
||||
|
||||
def filter_dupes(ops):
|
||||
|
Loading…
Reference in New Issue
Block a user