library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.curve_parameter_pkg.all;

entity tb_modular_mult_RADIX4 is
end entity;

architecture structural of tb_modular_mult_RADIX4 is


signal a, b_in, mult_out, correct_res : std_logic_vector(255 downto 0);
signal a_in, ax2, ax3 : std_logic_vector(257 downto 0);
signal clock, reset, start, shift, done : std_logic;

 component modular_mult_RADIX4 is
    port( B : in std_logic_vector(255 downto 0);
          A, Ax2, Ax3 : in std_logic_vector(257 downto 0);   --precomputated input given by precomputation block
          clock, reset, start, shift : in std_logic;          --reset active high 
          mult_out : out std_logic_vector(255 downto 0);
          done : out std_logic); 
 end component;

begin

a <= x"12036cee2b6ffe738cc740797779e89800700a4d4141d8ab75eb4dca135978a3";

--a_in is on 258 bit--
--a_in <= "000001001000000011011011001110111000101011011011111111111001110011100011001100011101000000011110010111011101111001111010001001100000000000011100000000101001001101010000010100000111011000101010110111010111101011010011011100101000010011010110010111100010100011";
--b_in <= "0010000101101001001101101101001111001101011011100101001111111110110000001010010011100010001100011111110111010110110111000101110001101001001011001100011101100000100101010010010110100111101100101100100101010110001011010110000010001111001001011101010100011010";

-- WORSRT CASE --
         
a_in <= "000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111101100";
b_in <= "0111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111101100";

ax2 <= std_logic_vector(unsigned(a_in(255 downto 0))*to_unsigned(2,2));
ax3 <= std_logic_vector(unsigned(a_in(255 downto 0))*to_unsigned(3,2));

reset <= '0', '1' after 10 ns, '0' after 15 ns;
start <= '0', '1' after 25 ns, '0' after 45 ns;
shift <= '0', '1' after 45 ns;

clk: process
begin
clock <= '0', '1' after 10 ns;
wait for 20 ns;
end process;

correct_res<= std_logic_vector((unsigned(a_in(255 downto 0)) * unsigned(b_in)) mod(unsigned(p)) );  -- correct result for comparison

comp: modular_mult_RADIX4 port map ( b_in, a_in, ax2, ax3, clock, reset, start, shift, mult_out, done);

end architecture;
