diff --git a/cpu/cpu.vhdl b/cpu/cpu.vhdl index 4afd788..b177623 100644 --- a/cpu/cpu.vhdl +++ b/cpu/cpu.vhdl @@ -49,7 +49,7 @@ architecture behavior of cpu is signal load_reg_next, load_reg: std_logic_vector(15 downto 0); signal load_addr_next, load_addr: std_logic_vector(15 downto 0); - signal hold_inst_next, hold_inst: std_logic_vector(15 downto 0); + signal inst_next, inst: std_logic_vector(15 downto 0); type regbank is array(0 to 15) of std_logic_vector(15 downto 0); signal reg_d: regbank; @@ -62,7 +62,6 @@ begin load_reg_r: reg port map(clk => clk, rst => rst, d => load_reg_next, q => load_reg); load_addr_r: reg port map(clk => clk, rst => rst, d => load_addr_next, q => load_addr); - hold_inst_r: reg port map(clk => clk, rst => rst, d => hold_inst_next, q => hold_inst); allregs: for i in 0 to 15 generate @@ -73,15 +72,16 @@ begin begin if rst = '1' then cpu_state <= BRANCH; -- wait a cycle at first + inst <= x"0000"; elsif rising_edge(clk) then cpu_state <= cpu_state_next; + inst <= inst_next; end if; end process; code_addr <= reg_q(14); - process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, hold_inst) is - variable inst: std_logic_vector(15 downto 0); + process(code_data, reg_q, mem_in, mem_busy, alu_q, alu_flag, cpu_state, load_addr, load_reg, inst) is variable regn_0: natural; variable regn_1: natural; variable regn_2: natural; @@ -110,9 +110,9 @@ begin case cpu_state is when RUN => reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2); - inst := code_data; + inst_next <= code_data; when LOAD => - inst := x"0000"; -- NOP + inst_next <= inst; mem_addr <= load_addr; -- maintain this until we're done reading if load_reg(3 downto 0) = x"e" then cpu_state_next <= BRANCH; @@ -127,93 +127,99 @@ begin reg_d(regn_0) <= mem_in; end if; when BRANCH => - inst := x"0000"; -- NOP + inst_next <= x"0000"; -- NOP reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2); when WAIT_MEM => - cpu_state_next <= RUN; - inst := hold_inst; + inst_next <= inst; reg_d(14) <= std_logic_vector(unsigned(reg_q(14)) + 2); - end case; - - hold_inst_next <= inst; - - regn_0 := to_integer(unsigned(inst(11 downto 8))); - regn_1 := to_integer(unsigned(inst(7 downto 4))); - regn_2 := to_integer(unsigned(inst(3 downto 0))); - - case inst(15 downto 12) is - when "0000" => -- NOP - when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits) if mem_busy = '1' then reg_d(14) <= reg_q(14); -- halt the prefetcher cpu_state_next <= WAIT_MEM; - else - mem_read <= '1'; - cpu_state_next <= LOAD; - reg_d(14) <= reg_q(14); -- halt the prefetcher - - load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); - mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); - load_reg_next(3 downto 0) <= inst(11 downto 8); end if; - - when "0010" => -- STORE rn, [rm, imm] - if mem_busy = '1' then - reg_d(14) <= reg_q(14); -- halt the prefetcher - cpu_state_next <= WAIT_MEM; - else - mem_write <= '1'; - mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); - mem_out <= reg_q(regn_0); - end if; - - --- ALU stuff - when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm) - when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm) - when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm) - when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm) - when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn) - when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm) - when "1001" => -- SETH rd, imm - reg_d(regn_0)(15 downto 8) <= inst(7 downto 0); - when "1010" => -- SHR rd, rn, imm (rd := rn >> imm) - alu_sel <= inst(15 downto 12); - alu_a <= reg_q(regn_1); - alu_b <= x"000" & inst(3 downto 0); - reg_d(regn_0) <= alu_q; - when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm) - - when "1100" => -- CMP rn, rm (flag := 1 if equal) - alu_sel <= "1100"; - alu_a <= reg_q(regn_0); - alu_b <= reg_q(regn_1); - reg_d(15)(0) <= alu_flag; - - when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits) - if reg_q(15)(0) = '1' then - reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0')); - cpu_state_next <= BRANCH; - end if; - when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit) - reg_d(regn_0) <= x"00" & inst(7 downto 0); - when "1111" => -- BNEQ imm - if reg_q(15)(0) = '0' then - reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0')); - cpu_state_next <= BRANCH; - end if; - - when others => -- do nothing end case; - if do_alu = '1' then - -- 1:1 mapping - alu_sel <= inst(15 downto 12); - alu_a <= reg_q(regn_1); - alu_b <= reg_q(regn_2); - reg_d(regn_0) <= alu_q; - reg_d(15)(0) <= alu_flag; - if inst(11 downto 8) = x"e" then - cpu_state_next <= BRANCH; + if cpu_state = RUN then + regn_0 := to_integer(unsigned(inst(11 downto 8))); + regn_1 := to_integer(unsigned(inst(7 downto 4))); + regn_2 := to_integer(unsigned(inst(3 downto 0))); + + case inst(15 downto 12) is + when "0000" => -- NOP + when "0001" => -- LOAD rn, [rm, imm] (imm is signed 4 bits) + if mem_busy = '1' then + reg_d(14) <= reg_q(14); -- halt the prefetcher + inst_next <= inst; + cpu_state_next <= WAIT_MEM; + else + mem_read <= '1'; + cpu_state_next <= LOAD; + reg_d(14) <= reg_q(14); -- halt the prefetcher +-- inst_next <= inst; + + load_addr_next <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); + mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); + load_reg_next(3 downto 0) <= inst(11 downto 8); + end if; + + when "0010" => -- STORE rn, [rm, imm] + if mem_busy = '1' then + reg_d(14) <= reg_q(14); -- halt the prefetcher + inst_next <= inst; + cpu_state_next <= WAIT_MEM; + else + mem_write <= '1'; + mem_addr <= std_logic_vector(signed(reg_q(regn_1)) + signed(inst(3 downto 0) & '0')); + mem_out <= reg_q(regn_0); + end if; + + --- ALU stuff + when "0011" => do_alu := '1'; -- ADD rd, rn, rm (rd := rn + rm) + when "0100" => do_alu := '1'; -- SUB rd, rn, rm (rd := rn - rm) + when "0101" => do_alu := '1'; -- OR rd, rn, rm (rd := rn or rm) + when "0110" => do_alu := '1'; -- AND rd, rn, rm (rd := rn and rm) + when "0111" => do_alu := '1'; -- NOT rd, rn (rd := not rn) + when "1000" => do_alu := '1'; -- XOR rd, rn, rm (rd := rn xor rm) + when "1001" => -- SETH rd, imm + reg_d(regn_0)(15 downto 8) <= inst(7 downto 0); + when "1010" => -- SHR rd, rn, imm (rd := rn >> imm) + alu_sel <= inst(15 downto 12); + alu_a <= reg_q(regn_1); + alu_b <= x"000" & inst(3 downto 0); + reg_d(regn_0) <= alu_q; + when "1011" => do_alu := '1'; -- MUL rd, rn, rm (rd := rn * rm) + + when "1100" => -- CMP rn, rm (flag := 1 if equal) + alu_sel <= "1100"; + alu_a <= reg_q(regn_0); + alu_b <= reg_q(regn_1); + reg_d(15)(0) <= alu_flag; + + when "1101" => -- BEQ imm (jump to [pc, imm] if flag is set, imm is signed 12 bits) + if reg_q(15)(0) = '1' then + reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0')); + cpu_state_next <= BRANCH; + end if; + when "1110" => -- SET rd, imm (rd := imm, imm is 8 bit) + reg_d(regn_0) <= x"00" & inst(7 downto 0); + when "1111" => -- BNEQ imm + if reg_q(15)(0) = '0' then + reg_d(14) <= std_logic_vector(signed(reg_q(14)) + signed(inst(11 downto 0) & '0')); + cpu_state_next <= BRANCH; + end if; + + when others => -- do nothing + end case; + + if do_alu = '1' then + -- 1:1 mapping + alu_sel <= inst(15 downto 12); + alu_a <= reg_q(regn_1); + alu_b <= reg_q(regn_2); + reg_d(regn_0) <= alu_q; + reg_d(15)(0) <= alu_flag; + if inst(11 downto 8) = x"e" then + cpu_state_next <= BRANCH; + end if; end if; end if; end process; diff --git a/tools/as.py b/tools/as.py index f95bb90..72abbf2 100644 --- a/tools/as.py +++ b/tools/as.py @@ -92,7 +92,7 @@ def generate_ops(ops, labels, relocs): if isinstance(p, str): # label ref if len(params) == 1: # branch yield 14 # pc - yield labels[p] - pc - 2 + yield labels[p] - pc - 4 else: # set, allow relocs here relocs.append((pc, p)) yield 0xff diff --git a/tools/cc.py b/tools/cc.py index 9f9ec4f..e058631 100644 --- a/tools/cc.py +++ b/tools/cc.py @@ -473,10 +473,10 @@ class ShlOp(BinOp): return [f'set {sc1}, 1', f'or {self.dest}, {self.left}, {self.left}', f'sub {sc0}, {self.right}, {sc1}', - f'beq [pc, 6]', + f'beq [pc, 4]', f'add {self.dest}, {self.dest}, {self.dest}', f'sub {sc0}, {sc0}, {sc1}', - f'bneq [pc, -6]'] + f'bneq [pc, -8]'] class LtOp(BinOp): @@ -485,7 +485,7 @@ class LtOp(BinOp): sc0 = scratches[0] return [f'set {self.dest}, 0', f'sub {sc0}, {self.left}, {self.right}', - f'bneq [pc, 2]', + f'bneq [pc, 0]', f'set {self.dest}, 1'] class GtOp(LtOp): @@ -531,7 +531,7 @@ class BoolNot(UnOp): def synth(self, scratches): return [f'set {self.dest}, 0', f'cmp {self.dest}, {self.operand}', - f'bneq [pc, 2]', + f'bneq [pc, 0]', f'set {self.dest}, 1'] class NeqOp(BinOp): @@ -555,7 +555,7 @@ class FnCall(AsmOp): sc0 = scratches[0] fn = self.dest_fn - return out + [f'set {sc0}, 2', + return out + [f'set {sc0}, 0', f'add lr, pc, {sc0}', f'or pc, {fn}, {fn}'] @@ -1375,10 +1375,11 @@ preamble = [f'_start:', f'set sp, 0', f'seth sp, {0x11}', # 256 bytes of stack ought to be enough f'set r2, main', - f'set r3, 2', + f'set r3, 0', f'add lr, pc, r3', f'or pc, r2, r2', - f'or pc, pc, pc // loop forever', + f'cmp r0, r0', + f'beq [pc, -4] // loop forever', ] def filter_dupes(ops):