I have designed a 8x8 2s complement multiplier in VHDL and it doesn't seem to fit the needs for my PSD estimator and I think I have to transform it into pipeline. Here you have my multiplier. Can anyone tell me how can I apply the pipeline for my multiplier to run faster?
Regards
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
use IEEE.std_logic_unsigned.all;
use ieee.numeric_std.all;
use ieee.std_logic_textio.all;
use IEEE.STD_LOGIC_1164.ALL;
entity mult_secv is
generic(
Na : integer := 8;
Nb : integer := 8;
Nbcnt : integer := 4
);
port(
iCLK : in std_logic;
iRST : in std_logic;
iDV : in std_logic;
ia : in std_logic_vector(Na-1 downto 0);
ib : in std_logic_vector(Nb-1 downto 0);
oDV : out std_logic;
oDATA : out std_logic_vector(Na+Nb-2 downto 0)
);
end mult_secv;
architecture produs of mult_secv is
signal sa, srez : std_logic_vector(Na+Nb-2 downto 0);
signal sb : std_logic_vector(Nb-1 downto 0);
signal scnt : std_logic_vector(Nbcnt-1 downto 0);
signal scntmax : std_logic_vector(Nbcnt-1 downto 0) := "0111";
begin
process(iCLK,iRST)
begin
if iRST='1' then
sa <= (others => '0');
elsif rising_edge(iCLK) then
if iDV='1' then
sa <= (Na+Nb-2 downto Na => ia(Na-1)) & ia;
else
sa <= sa(Na+Nb-3 downto 0) & '0';
end if;
end if;
end process;
process(iCLK,iRST)
begin
if iRST='1' then
sb <= (others => '0');
elsif rising_edge(iCLK) then
if iDV='1' then
sb <= ib;
else
sb <= '0' & sb(Nb-1 downto 1);
end if;
end if;
end process;
process(iCLK,iRST)
begin
if iRST='1' then
srez <= (others => '0');
elsif rising_edge(iCLK) then
if iDV='1' then
srez <= (others => '0');
if ib(Nb-1)='1' then
srez <= not (ia & (Nb-2 downto 0 => '0')) + '1';
else
srez <= (others => '0');
end if;
elsif sb(0)='1' then
srez <= srez+sa;
else
srez <= srez;
end if;
end if;
end process;
process(iCLK,iRST)
begin
if iRST='1' then
scnt <= (others =>'0');
elsif rising_edge(iCLK) then
if iDV='1' then
scnt <= (Nbcnt-1 downto 1 => '0') & '1';
elsif scnt=scntmax then
scnt <= (others => '0');
else
scnt <= scnt +'1';
end if;
end if;
end process;
oDATA <= srez;
process(iCLK,iRST)
begin
if iRST='1' then
oDV <= '0';
elsif rising_edge(iCLK) then
if scnt=scntmax then
oDV <= '1';
else
oDV <= '0';
end if;
end if;
end process;
end;
It should be something like this (not tested but the idea is here). There are still 8 clock cycles to calculate but the multiplier is now pipelined.
Synthesis tools these days will generate this all for you. It's literally as simple as this: