//-----------------------------------------------------------------------------
// This confidential and proprietary software may be used only as authorized by
// a licensing agreement from PLDA. In the event of publication, a copyright
// notice must be reproduced on all authorized copies.
//
//-----------------------------------------------------------------------------
// Project : XpressRich5-AXI/AXI_BFM
//           (employed in PIPE Monitor Checker | MODIFIED by nrigotti)
// Author  : caires (based on the module AXI RAM written by rtuszewski)
//-----------------------------------------------------------------------------
// Description: The AXI Backdoor Access implements an AXI interface that gives access
// to N AXI RAMs translanting AXI <-> RAM ports. Good luck :)
//
//              This module is composed of:
//                (1) Local Declarations
//                (2) write_mgt   : AXI4 Slave Write Request Management
//                (3) read_mgt    : AXI4 Slave Read  Request Management
//
// Dependency : -
//-----------------------------------------------------------------------------


`default_nettype none
`timescale 1 ns / 1 ps

`include "pipemc_constant_h.v"

module axi_ram_backdoor
 #(
    //-------------------------------------------------------------------------
    // Constants
    parameter   AXI_ADDR_WIDTH            =    64,
    parameter   AXI_DATA_WIDTH            =   512,
    parameter   RAM_1_N                   =     1,  // #             RAMs with ADDR WIDTH = MEM_ADDR_WIDTH_MAX
    parameter   RAM_2_N                   =     0,  // # Pairs    of RAMs with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 1
    parameter   RAM_4_N                   =     0,  // # Quartets of RAMs with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 2
    parameter   RAM_8_N                   =     0,  // # Octets   of RAMs with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 4
    parameter   MEM_ADDR_WIDTH_MAX        =     8   // Max address width amongs the connected RAMs
    //-------------------------------------------------------------------------
  ) (
    //-------------------------------------------------------------------------
    // AXI4 Slave Interface
    //  . Clock and Resets
    // input  wire                                         en                    ,
    input  wire                                         arstn                 , //  Asynchronous Reset#
    input  wire                                         srstn                 , //  Synchronous  Reset#
    input  wire                                         clk                   ,
    //  . Read Address Channel                                         
    input  wire [4                                -1:0] arid                  ,
    input  wire [AXI_ADDR_WIDTH                   -1:0] araddr                ,
    input  wire [4                                -1:0] arregion              , // NOT USED
    input  wire [8                                -1:0] arlen                 , // 8'h0
    input  wire [3                                -1:0] arsize                , // NOT USED | Byte for transfer
    input  wire [2                                -1:0] arburst               , // x01
    input  wire                                         arlock                , // Locked Transaction
    input  wire [4                                -1:0] arcache               , // NOT USED
    input  wire [3                                -1:0] arprot                , // NOT USED
    input  wire [4                                -1:0] arqos                 , // NOT USED
    input  wire                                         arvalid               ,
    output wire                                         arready               ,
    //  . Read Data Channel
    output wire [4                                -1:0] rid                   ,
    output wire [AXI_DATA_WIDTH                   -1:0] rdata                 ,
    output wire [2                                -1:0] rresp                 , // NOT USED
    output wire                                         rlast                 ,
    output wire                                         rvalid                ,
    input  wire                                         rready                ,
    //-------------------------------------------------------------------------
    // RAM Ports
    output wire [(RAM_1_N + 2 * RAM_2_N + 4 * RAM_4_N + 8 * RAM_8_N)                    -1:0] shared_ram_rden       ,
    output wire [MEM_ADDR_WIDTH_MAX                                                     -1:0] shared_ram_rdaddr     ,
    input  wire [(RAM_1_N + 2 * RAM_2_N + 4 * RAM_4_N + 8 * RAM_8_N) * AXI_DATA_WIDTH   -1:0] shared_ram_rddata
    //-------------------------------------------------------------------------
  );

  //----------------------------------------------------------------------------
  // Local Declaraitons 
  //----------------------------------------------------------------------------
  //  . Total number of RAMs
  localparam N_TOT_RAM        = (RAM_1_N + 2 * RAM_2_N + 4 * RAM_4_N + 8 * RAM_8_N);

  //  . Number of RAMs rounded to a power of 2
  localparam N_TOT_RAM_ROUND  = (N_TOT_RAM <= 2 ) ? 2  :
                                (N_TOT_RAM <= 4 ) ? 4  :
                                (N_TOT_RAM <= 8 ) ? 8  :
                                (N_TOT_RAM <= 16) ? 16 :
                                (N_TOT_RAM <= 32) ? 32 :
                                (N_TOT_RAM <= 64) ? 64 :
                                                    128;

  //  . Extra bit in the global address
  localparam ADDR_EXTRA_WIDTH = (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 2 ) ? 1  :
                                (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 4 ) ? 2  :
                                (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 8 ) ? 3  :
                                (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 16) ? 4  :
                                (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 32) ? 5  :
                                (RAM_1_N + RAM_2_N + RAM_4_N + RAM_8_N <= 64) ? 6  :
                                                                                7  ;

  //  . Number of Hierachical levels of 2x1 multiplexers
  localparam MUX_WIDTH        = (N_TOT_RAM <= 2 ) ? 1 :
                                (N_TOT_RAM <= 4 ) ? 2 :
                                (N_TOT_RAM <= 8 ) ? 3 :
                                (N_TOT_RAM <= 16) ? 4 :
                                (N_TOT_RAM <= 32) ? 5 :
                                (N_TOT_RAM <= 64) ? 6 :
                                                    7 ;


  //  . Read FSM states
  localparam RD_ADDR_STATE      = 2'b00 ;
  localparam RD_PIPE_DATA_STATE = 2'b01 ;
  localparam RD_RESP_STATE      = 2'b10 ;

  // AXI4 Slave Read  Request Management (FSM)
  reg  [3                                       -1:0] rd_state_fsm        ;

  // AXI4 Read Channel
  //  . Read Address Channel
  reg                                                 arready_r           ;
  //  . Read Data Channel
  reg   [4                                      -1:0] rid_r               ;
  reg                                                 rlast_r             ;
  reg   [AXI_DATA_WIDTH                         -1:0] rdata_r             ;
  reg                                                 rvalid_r            ;

  // RAM Ports
  //  . Clocked signals
  reg   [N_TOT_RAM                              -1:0] shared_ram_rden_r   ;
  reg   [N_TOT_RAM_ROUND                        -1:0] shared_ram_rden_rr  ;
  reg   [N_TOT_RAM * AXI_DATA_WIDTH             -1:0] shared_ram_rddata_r ;
  reg   [MEM_ADDR_WIDTH_MAX                     -1:0] shared_ram_rdaddr_r ;
  //  . Combinational signals
  wire  [N_TOT_RAM                              -1:0] shared_ram_rden_c   ;
  wire  [N_TOT_RAM_ROUND*AXI_DATA_WIDTH         -1:0] shared_ram_rddata_c ;

  // Multiplexer wires
  wire  [(N_TOT_RAM_ROUND - 1 )*AXI_DATA_WIDTH  -1:0] mux_data            ;

  reg [2                                -1:0] cnt                   ;


  //---------------------------------------------------------------------------
  // Combinational Logic to Compute RAMs' Read Enable Signals
  //---------------------------------------------------------------------------
  genvar i, j, k, l;

  generate
    // Enable for with ADDR WIDTH = MEM_ADDR_WIDTH_MAX 
    for(i=0; i < RAM_1_N; i=i+1) begin
      assign shared_ram_rden_c [i] = (araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == i);
    end

    // Enable for with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 1
    for(j=0; j < RAM_2_N; j=j+1) begin
      assign shared_ram_rden_c [j + RAM_1_N    ] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + j) && araddr[MEM_ADDR_WIDTH_MAX - 1 +: 1] == 1'b0;
      assign shared_ram_rden_c [j + RAM_1_N + 1] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + j) && araddr[MEM_ADDR_WIDTH_MAX - 1 +: 1] == 1'b1;
    end

    // Enable for with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 2 
    for(k=0; k < RAM_4_N; k=k+1) begin
      assign shared_ram_rden_c [k + RAM_1_N + RAM_2_N    ] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + k) && araddr[MEM_ADDR_WIDTH_MAX - 2 +: 2] == 2'b00;
      assign shared_ram_rden_c [k + RAM_1_N + RAM_2_N + 1] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + k) && araddr[MEM_ADDR_WIDTH_MAX - 2 +: 2] == 2'b01;
      assign shared_ram_rden_c [k + RAM_1_N + RAM_2_N + 2] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + k) && araddr[MEM_ADDR_WIDTH_MAX - 2 +: 2] == 2'b10;
      assign shared_ram_rden_c [k + RAM_1_N + RAM_2_N + 3] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + k) && araddr[MEM_ADDR_WIDTH_MAX - 2 +: 2] == 2'b11;
    end

    // Enable for with ADDR WIDTH = MEM_ADDR_WIDTH_MAX - 3
    for(l=0; l < RAM_8_N; l=l+1) begin
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N    ] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b000;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 1] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b001;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 2] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b010;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 3] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b011;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 4] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b100;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 5] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b101;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 6] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b110;
      assign shared_ram_rden_c [l + RAM_1_N + RAM_2_N + RAM_4_N + 7] = araddr[MEM_ADDR_WIDTH_MAX +: ADDR_EXTRA_WIDTH] == (RAM_1_N + RAM_2_N + RAM_4_N + l) && araddr[MEM_ADDR_WIDTH_MAX - 3 +: 3] == 3'b111;
    end
  endgenerate


  //---------------------------------------------------------------------------
  // Read Data Multiplexer
  // NOTE   For the moment it is full combinational but it can be pipelined
  //---------------------------------------------------------------------------
  genvar m,n;

  //  . Adapt shared_ram_rddata dimension by adding zeros
  assign shared_ram_rddata_c = {{(N_TOT_RAM_ROUND - N_TOT_RAM){1'b0}}, shared_ram_rddata_r};

  generate
    if (MUX_WIDTH > 0) begin
      // 2x1 Multiplexer for input data
      for (n=0; n < N_TOT_RAM_ROUND / 2; n=n+1) begin
        assign mux_data[n*(AXI_DATA_WIDTH)  +:AXI_DATA_WIDTH] = (shared_ram_rden_rr[n*2] == 1'b1) ?  shared_ram_rddata_c[(n*2)*AXI_DATA_WIDTH    +:AXI_DATA_WIDTH] :
                                                                                                    shared_ram_rddata_c[(n*2 +1)*AXI_DATA_WIDTH +:AXI_DATA_WIDTH] ;
      end

      for(m=1; m < MUX_WIDTH; m=m+1) begin
        for (n=0; n < N_TOT_RAM_ROUND / (2 * (m+1)); n=n+1) begin
          assign mux_data[(n + (N_TOT_RAM_ROUND - N_TOT_RAM_ROUND / (m+1) )) *(AXI_DATA_WIDTH)  +:AXI_DATA_WIDTH] =
                  (shared_ram_rden_rr[n*4 +:2] >= 1'b1) ?  mux_data[(2*n)    * AXI_DATA_WIDTH  +:AXI_DATA_WIDTH] :
                                                          mux_data[(2*n+1)  * AXI_DATA_WIDTH  +:AXI_DATA_WIDTH] ;
        end
      end
    end
  endgenerate

  // Register that samples the MUX output
  always @(negedge arstn or posedge clk) begin
    if (!arstn) begin
      rdata_r <= {(AXI_DATA_WIDTH){1'b0}};
    end
    else begin
      if (!srstn) begin
        rdata_r <= {(AXI_DATA_WIDTH){1'b0}};
      end
      else begin
          rdata_r <= mux_data[ ((N_TOT_RAM_ROUND - 2 )*AXI_DATA_WIDTH)  +:AXI_DATA_WIDTH]; // Only the multiplexer's last data is clocked
      end
    end
  end


  //---------------------------------------------------------------------------
  // AXI4 Slave Read Request Management | FSM
  //---------------------------------------------------------------------------
  //  . Read Address Channel
  assign  arready           = arready_r ;
  //  . Read Data Channel
  assign  rvalid            = rvalid_r  ;
  assign  rid               = rid_r     ;
  assign  rlast             = rlast_r   ;
  assign  rdata             = rdata_r ;
  assign  rresp             = `AXI_xRESP_OKAY;
  //  . RAM Ports
  assign  shared_ram_rden   = shared_ram_rden_r;
  assign  shared_ram_rdaddr = shared_ram_rdaddr_r ;


  always @(negedge arstn or posedge clk)
  begin: read_mgt
    if (arstn == 1'b0) begin
      // Reset FSM
      rd_state_fsm     <= RD_ADDR_STATE ;
      // Reset Read Address Channel
      arready_r <= 1'b0 ;
      // Read Address Channel
      rvalid_r  <= 1'b0 ;
      rid_r     <= 4'b0 ;
      rlast_r   <= 1'b0 ;
      // RAM Port Channels
      shared_ram_rden_rr     <= {(N_TOT_RAM_ROUND){1'b0}};
      shared_ram_rden_r    <= {(N_TOT_RAM){1'b0}};
      shared_ram_rddata_r   <= {(N_TOT_RAM * MEM_ADDR_WIDTH_MAX){1'b0}};
      shared_ram_rdaddr_r   <= {(MEM_ADDR_WIDTH_MAX){1'b0}};

      cnt <= 2'd0;

    end else begin
      if (srstn == 1'b0) begin
        // Reset FSM
        rd_state_fsm     <= RD_ADDR_STATE ;
        // Reset Read Address Channel
        arready_r <= 1'b0 ;
        // Read Address Channel
        rvalid_r  <= 1'b0 ;
        rid_r     <= 4'b0 ;
        rlast_r   <= 1'b0 ;
        // RAM Port Channels
        shared_ram_rden_rr     <= {(N_TOT_RAM_ROUND){1'b0}};
        shared_ram_rden_r    <= {(N_TOT_RAM){1'b0}};
        shared_ram_rddata_r   <= {(N_TOT_RAM * MEM_ADDR_WIDTH_MAX){1'b0}};
        shared_ram_rdaddr_r   <= {(MEM_ADDR_WIDTH_MAX){1'b0}};

        cnt <= 2'd0;
      end
      else begin
        case (rd_state_fsm)
          //-------------------------------------------------------------------
          RD_ADDR_STATE : // Wait for a Read Request from AXI. Then, read the address received from AXI
          //-------------------------------------------------------------------
            begin
              if (arvalid && arready_r) begin // Read  Address is received
                arready_r <= 1'b0 ;
                rid_r     <= arid ;

                shared_ram_rden_r   <= shared_ram_rden_c  ; // Send Enables
                shared_ram_rden_rr  <= {{(N_TOT_RAM_ROUND - N_TOT_RAM){1'b0}}, shared_ram_rden_c}  ;
                shared_ram_rdaddr_r <= araddr [MEM_ADDR_WIDTH_MAX   -1:0];  // Send Address

                if (arlen == 0)   rlast_r   <= 1'b1 ;
                else              rlast_r   <= 1'b0 ;
  
                rd_state_fsm <= RD_PIPE_DATA_STATE ;
              end
              else begin // Wait for valid read address to be received
                rd_state_fsm <= RD_ADDR_STATE;
                arready_r <= 1'b1 ;
                rvalid_r  <= 1'b0 ;
                rlast_r   <= 1'b0 ;
              end
            end
          //-------------------------------------------------------------------
          RD_PIPE_DATA_STATE : // Read the data received from RAM
          //-------------------------------------------------------------------
            begin
              rvalid_r              <= 1'b0                 ;

              shared_ram_rden_r     <= {(N_TOT_RAM_ROUND){1'b0}}  ;

              if(cnt < 1) begin
                cnt <= cnt + 2'd1;
                rd_state_fsm        <= RD_PIPE_DATA_STATE ;
              end
              else begin
                cnt <= 2'd0;

                shared_ram_rddata_r   <= shared_ram_rddata    ;
                rd_state_fsm        <= RD_RESP_STATE ;
              end

              
            end
          //-------------------------------------------------------------------
          RD_RESP_STATE : // Send the data to AXI
          //-------------------------------------------------------------------
            begin

              if(rready && rlast_r) begin // If AXI is ready, send the data received from RAM
                rvalid_r      <= 1'b1               ;
                rd_state_fsm  <= RD_ADDR_STATE ;
              end
              else if (rready && ~rlast_r) begin
                rvalid_r      <= 1'b1               ;

                shared_ram_rden_r   <= shared_ram_rden_rr[N_TOT_RAM -1:0]  ; // Send Enables
                shared_ram_rdaddr_r <= shared_ram_rdaddr_r + 1;  // Send Address

                rd_state_fsm  <= RD_PIPE_DATA_STATE ;
              end
            end
          //-------------------------------------------------------------------
          default : // Unexpected state. Signals are reset
          //-------------------------------------------------------------------
          begin
            // Reset FSM
            rd_state_fsm     <= RD_ADDR_STATE ;
            // Reset Read Address Channel
            arready_r <= 1'b0 ;
            // Read Address Channel
            rvalid_r  <= 1'b0 ;
            rid_r     <= 4'b0 ;
            rlast_r   <= 1'b0 ;
            // RAM Port Channels
            shared_ram_rden_r     <= {N_TOT_RAM{1'b0}};
            shared_ram_rden_rr    <= {(N_TOT_RAM){1'b0}};
            shared_ram_rddata_r   <= {(N_TOT_RAM * MEM_ADDR_WIDTH_MAX){1'b0}};
            shared_ram_rdaddr_r   <= {(MEM_ADDR_WIDTH_MAX){1'b0}};
          end
          //-------------------------------------------------------------------
        endcase
      end
    end
  end

endmodule

`resetall