library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.curve_parameter_pkg.all;

 entity modular_mult_RADIX2 is

    port( A,B : in std_logic_vector(255 downto 0);
          clock, reset, start, shift : in std_logic;          --reset active high 
          mult_out : out std_logic_vector(255 downto 0);
          done : out std_logic); 

 end entity;

 architecture behavioural of modular_mult_RADIX2 is

signal reg1_out, reg2_out, or_in, reg2_in : std_logic_vector(255 downto 0);
signal s1_a, s1_b, s2_a, s2_b, s3_a, s3_b, reg_doub_out, mux1_out, mux2_in10, mux2_out, s1_out, s2_out, s3_out, inv1_in, inv2_in : std_logic_vector(257 downto 0);
signal reg_shift_out : std_logic_vector(256 downto 0);
signal mux1_sel, or_out, sign_s2, sign_s3: std_logic;
signal mux2_sel : std_logic_vector(1 downto 0);

 component mux2to1 is
    generic(n : integer);
    port( in_0, In_1 : in std_logic_vector (n-1 downto 0);
          sel: in std_logic;
          data_out : out std_logic_vector (n-1 downto 0));
 end component;

 component mux4to1 is
    generic( n : integer);
    port( in_00, in_01, in_10, in_11 : in std_logic_vector(n-1 downto 0);
          sel : in std_logic_vector(1 downto 0);
          data_out : out std_logic_vector(n-1 downto 0));
 end component;

 component reg is
     generic( n: integer);
     port ( data_in : in std_logic_vector( n-1 downto 0);
            clock, en, reset: in std_logic;                 --reset active high
            data_out : out std_logic_vector( n-1 downto 0));
 end component;

component OR_256bit is
    Port ( input_bits : in STD_LOGIC_VECTOR(255 downto 0);
           or_output : out STD_LOGIC);
end component;

 component flip_flop is
     port ( data_in : in std_logic;
            clock, en, reset: in std_logic; --reset attivo alto
            data_out : out std_logic);
 end component;

component CLA_258bit is  --------- 259 BIT input/output ---------
    port( a_in,b_in : in std_logic_vector( 257 downto 0);
          c_in : in std_logic;
          s : out std_logic_vector( 257 downto 0));
end component;

component shift_reg is
     generic( n: integer);
     port ( data_in : in std_logic_vector( n-1 downto 0);
            clock, en, reset, shift: in std_logic; --reset active high, en enable the loading, shift enable the shift
            data_out : out std_logic_vector(n downto 0));
 end component;

  begin
 
reg1: reg generic map ( n => 256 )
          port map ( A , clock, start, reset, reg1_out );
s1_a <= '0' & '0' & reg1_out;

shift_reg1: shift_reg generic map ( n => 256 )
          port map ( B , clock, start, reset, shift, reg_shift_out );

mux1_sel <= reg_shift_out(256);

or_in <= reg_shift_out(255 downto 0);
or_gate : OR_256bit port map (or_in, or_out);

reg_doub: reg generic map ( n => 258 )
          port map ( mux2_out , clock, '1', start, reg_doub_out ); --reset is connected to the start, each time the mult is started the register is resetted
s1_b <= reg_doub_out(256 downto 0) & '0'; --doubling

s1: CLA_258bit port map (s1_a, s1_b, '0', s1_out);

mux1: mux2to1 generic map (n => 258)
              port map (s1_b, s1_out, mux1_sel, mux1_out);

inv1_in <= '0' & '0' & p;
s2_a <= not( inv1_in);

inv2_in <= '0' & p & '0';  --2p extended
s3_b <= not( inv2_in);

s2_b <= mux1_out;
s3_a <= mux1_out;

s2: CLA_258bit port map (s2_a, s2_b, '1', s2_out);
s3: CLA_258bit port map (s3_a, s3_b, '1', s3_out);

sign_s2 <= s2_out(257);
sign_s3 <= s3_out(257);

mux2_sel <= sign_s2 & sign_s3;
mux2: mux4to1 generic map (n => 258)
              port map (s3_out, s2_out, mux2_in10, mux1_out, mux2_sel, mux2_out); -- in 10 is not used

reg2_in <= mux2_out(255 downto 0); 
reg2: reg generic map ( n => 256 )
          port map ( reg2_in , clock, or_out, reset, mult_out ); 

done <= not(or_out);

 end architecture;
