//
// Copyright (C) 2018 Ross Martin
//
// Versions
//
// 2019.02.08  Modified from dual_port_ram.v

/* verilator lint_off WIDTHTRUNC      */
/* verilator lint_off WIDTHEXPAND     */
/* verilator lint_off DECLFILENAME    */


`timescale 1ps / 1ps

`undef w
`undef r
`undef ra
`undef rb

`define w assign
`define r always@(posedge clk)
`define ra always@(posedge clk_a)
`define rb always@(posedge clk_b)

//
// Having a dont_touch here seemed to be a cause that resetn_r was getting a LUT1 buffer inserted in its
// path, ruining timing of the reset!
//     
//(* DONT_TOUCH = "YES" *)
module dual_port_ram_async_s
  #(
    parameter  RAM_ITEM_WIDTH       = 72,
    parameter  RAM_ITEM_NUMBER      = 1,
    parameter  RAM_LENGTH           = 512,
    parameter  ADDR_WIDTH           = $clog2(RAM_LENGTH),
    parameter  RAM_WIDTH            = RAM_ITEM_WIDTH*RAM_ITEM_NUMBER,
    parameter  USE_ULTRA_RAM        = (RAM_LENGTH*RAM_WIDTH > 120*32768),  // More than 120 BRAMs, go to ultra ram
    parameter  USE_BRAM             = (RAM_LENGTH >= 65),
    parameter  USE_DISTRIBUTED_RAM  = !(USE_ULTRA_RAM||USE_BRAM),
    parameter  DELAY                = 5, // USE_ULTRA_RAM ? 5 : USE_BRAM ? 3 : 3,  Have now equalized all to delay 5.
    parameter  NUM_SYNC             = 1
   )
   (
    input                              clk_w,
    input                              write_enable_i,
    input [ADDR_WIDTH-1:0]             write_addr_i,
    input signed [RAM_ITEM_WIDTH-1:0]  write_data_i [0:RAM_ITEM_NUMBER-1],
    input                              clk_r,
    input                              resetn_r,
    input [ADDR_WIDTH-1:0]             read_addr_i,
    output signed [RAM_ITEM_WIDTH-1:0] read_data_ro [0:RAM_ITEM_NUMBER-1],
    input [NUM_SYNC-1:0]               in_sync_i, // The sync is delayed by the same delay as the read delay, so read timing is known
    output [NUM_SYNC-1:0]              out_sync_o
   );

   dual_port_ram_async_s_inner
     #(
       .RAM_ITEM_WIDTH       ( RAM_ITEM_WIDTH      ), 
       .RAM_ITEM_NUMBER      ( RAM_ITEM_NUMBER     ), 
       .RAM_LENGTH           ( RAM_LENGTH          ),
       .USE_ULTRA_RAM        ( USE_ULTRA_RAM       ),
       .USE_DISTRIBUTED_RAM  ( USE_DISTRIBUTED_RAM ),
       .USE_BRAM             ( USE_BRAM            )
       )
   dpr_a_inner
     (
      .clk_a             ( clk_w              ),
      .clk_b             ( clk_r              ),
      .a_write_enable_i  ( write_enable_i     ),
      .a_write_addr_i    ( write_addr_i       ),
      .a_write_data_i    ( write_data_i       ),
      .b_read_addr_i     ( read_addr_i        ),
      .b_read_data_ro    ( read_data_ro       )
      ); 
   
   bxb_sync_delay_2 #(.DELAY(DELAY), .BIT_WIDTH(NUM_SYNC)) sync_delay_instance ( .clk(clk_r), .resetn(resetn_r), .in_i(in_sync_i), .out_o(out_sync_o));
   
endmodule // dual_port_ram_async_s



(* DONT_TOUCH = "YES" *)
module dual_port_ram_async_s_inner
  #(
    parameter  RAM_ITEM_WIDTH       = 72,
    parameter  RAM_ITEM_NUMBER      = 1,
    parameter  RAM_WIDTH            = RAM_ITEM_WIDTH*RAM_ITEM_NUMBER,
    parameter  RAM_LENGTH           = 512,
    parameter  ADDR_WIDTH           = $clog2(RAM_LENGTH),
    parameter  USE_ULTRA_RAM        = 0,
    parameter  USE_DISTRIBUTED_RAM  = 0,
    parameter  USE_BRAM             = 0
   )
   (
    input                              clk_a,
    input                              clk_b,
    input                              a_write_enable_i,
    input [ADDR_WIDTH-1:0]             a_write_addr_i,
    input signed [RAM_ITEM_WIDTH-1:0]  a_write_data_i [0:RAM_ITEM_NUMBER-1],
    input [ADDR_WIDTH-1:0]             b_read_addr_i,
    output signed [RAM_ITEM_WIDTH-1:0] b_read_data_ro [0:RAM_ITEM_NUMBER-1]
   );

   (* max_fanout = 20 *) reg                         a_write_enable_r  = 0;
   (* max_fanout = 20 *) reg                         a_write_enable_rr = 0;
   (* max_fanout = 20 *) reg [ADDR_WIDTH-1:0]        a_write_addr_r    = 0;
   (* max_fanout = 20 *) reg [ADDR_WIDTH-1:0]        a_write_addr_rr   = 0;
   (* max_fanout = 20 *) reg [RAM_WIDTH-1:0]         a_write_data_r    = 0;
   (* max_fanout = 20 *) reg [RAM_WIDTH-1:0]         a_write_data_rr   = 0;
   (* max_fanout = 20 *) reg [ADDR_WIDTH-1:0]        b_read_addr_r     = 0;
   (* max_fanout = 20 *) reg [ADDR_WIDTH-1:0]        b_read_addr_rr     = 0;
   reg [RAM_WIDTH-1:0]                               b_read_data_r     = 0;
   reg [RAM_WIDTH-1:0]                               b_read_data_rr    = 0;
                               
   `ra a_write_enable_r   <=  a_write_enable_i;
   `ra a_write_enable_rr  <=  a_write_enable_r;
   `ra a_write_addr_r     <=  a_write_addr_i;
   `ra a_write_addr_rr    <=  a_write_addr_r;
   `rb b_read_addr_r      <=  b_read_addr_i;
   `rb b_read_addr_rr     <=  b_read_addr_r;

   genvar                                            bram;
   genvar                                            item;
   
generate

   for(item=0; item<RAM_ITEM_NUMBER; item = item + 1)
     `ra a_write_data_r[item*RAM_ITEM_WIDTH +: RAM_ITEM_WIDTH] <=  a_write_data_i[item];

   `ra a_write_data_rr <= a_write_data_r;


   localparam MAX_BRAM_LENGTH  = 32768;  // MUST be a power of 2
   localparam BRAM_BITS        = $clog2(MAX_BRAM_LENGTH);
   localparam BRAM_NUMBER      = (RAM_LENGTH + MAX_BRAM_LENGTH - 1) / MAX_BRAM_LENGTH;
   localparam BRAM_NUMBER_BITS = $clog2(BRAM_NUMBER);

   
   if ( USE_ULTRA_RAM )
     begin : gen_blk_use_ultra_ram
	(* RAM_STYLE = "ultra" *)
	(* CASCADE_HEIGHT = 0 *)
	reg [RAM_WIDTH-1:0] 	   ram_t[0:RAM_LENGTH-1];

	initial
	  begin
	     integer ii;
	     for(ii=0; ii<RAM_LENGTH; ii=ii+1)
	       ram_t[ii] = 0;
	  end

	`ra if(a_write_enable_rr) ram_t[a_write_addr_rr] <= a_write_data_rr;

	`rb b_read_data_r <= ram_t[b_read_addr_rr];

        reg [RAM_WIDTH-1:0]        b_read_data_rx = 0;
        
	`rb b_read_data_rx  <= b_read_data_r;
	`rb b_read_data_rr  <= b_read_data_rx;

     end
   else if ( USE_DISTRIBUTED_RAM )
     begin : gen_blk_use_distributed_ram
	(* RAM_STYLE = "distributed" *)
	reg [RAM_WIDTH-1:0] 	   ram_t[0:RAM_LENGTH-1];

	initial
	  begin
	     integer ii;
	     for(ii=0; ii<RAM_LENGTH; ii=ii+1)
	       ram_t[ii] = 0;
	  end

	`ra if(a_write_enable_rr) ram_t[a_write_addr_rr] <= a_write_data_rr;

	`rb b_read_data_r <= ram_t[b_read_addr_rr];

        reg [RAM_WIDTH-1:0] b_read_data_rx = 0;
        
	`rb b_read_data_rx  <= b_read_data_r;
	`rb b_read_data_rr  <= b_read_data_rx;

     end
   else if ( USE_BRAM && BRAM_NUMBER==1 )
     begin : gen_blk_single_bram
	(* RAM_STYLE = "block" *) (* CASCADE_HEIGHT = 0 *)
	reg [RAM_WIDTH-1:0] 	   ram_t[0:RAM_LENGTH-1];


	initial
	  begin
	     integer ii;
	     for(ii=0; ii<RAM_LENGTH; ii=ii+1)
	       ram_t[ii] = 0;
	  end

	`ra if(a_write_enable_rr) ram_t[a_write_addr_rr] <= a_write_data_rr;

	`rb b_read_data_r <= ram_t[b_read_addr_rr];

        reg [RAM_WIDTH-1:0] b_read_data_rx = 0;
        
	`rb b_read_data_rx  <= b_read_data_r;
	`rb b_read_data_rr  <= b_read_data_rx;

     end
   else if ( USE_BRAM )
     begin : gen_blk_use_bram
        
        wire [BRAM_BITS-1:0]         single_bram_awaddr_rr;
        wire [BRAM_NUMBER_BITS-1:0]  cross_bram_awaddr_r;
        wire [BRAM_BITS-1:0]         single_bram_braddr_rr;
        reg [BRAM_NUMBER_BITS-1:0]   cross_bram_braddr_rrr    = 0;
        reg [BRAM_NUMBER_BITS-1:0]   cross_bram_braddr_rrrr   = 0;
              
        `w  single_bram_awaddr_rr   = a_write_addr_rr;
        `w  cross_bram_awaddr_r     = a_write_addr_r >> BRAM_BITS;

        `w  single_bram_braddr_rr   = b_read_addr_rr;
        `rb cross_bram_braddr_rrr  <= b_read_addr_rr >> BRAM_BITS;
        `rb cross_bram_braddr_rrrr <= cross_bram_braddr_rrr;

        wire [RAM_WIDTH-1:0]         b_read_data_rx_w[0:BRAM_NUMBER-1];
        
        for(bram=0; bram<BRAM_NUMBER; bram = bram + 1)
          begin

             reg a_single_bram_write_enable_rr = 0;
             wire correct_write_bram_w;

             `w correct_write_bram_w = (cross_bram_awaddr_r==bram);
             `ra a_single_bram_write_enable_rr <= (a_write_enable_r && correct_write_bram_w);
             
	     (* RAM_STYLE = "block" *) (* CASCADE_HEIGHT = 0 *)
	     reg [RAM_WIDTH-1:0] 	   ram_t[0:MAX_BRAM_LENGTH-1];
             reg [RAM_WIDTH-1:0]           b_read_data_rv = 0;
             reg [RAM_WIDTH-1:0]           b_read_data_rw = 0;
             
	     initial
	       begin
	          integer ii;
	          for(ii=0; ii<MAX_BRAM_LENGTH; ii=ii+1)
	            ram_t[ii] = 0;
	       end
             
	     `ra if(a_single_bram_write_enable_rr) ram_t[single_bram_awaddr_rr] <= a_write_data_rr;

             //`ra #2 if(a_single_bram_write_enable_rr) $display("Write to bram %d addr 0x%04X of 0x%08X\n", bram, single_bram_awaddr_rr, a_write_data_rr);
             
	     `rb b_read_data_rv <= ram_t[single_bram_braddr_rr];

             //`rb #4 $display("      bram %d addr 0x%04x read value 0x%08X", bram, single_bram_braddr_rr, b_read_data_r);
             
             
	     `rb b_read_data_rw  <= b_read_data_rv;

             `w b_read_data_rx_w[bram] = b_read_data_rw;
             
          end // for (bram=0; bram<BRAM_NUMBER; bram++)

	`rb b_read_data_rr  <= b_read_data_rx_w[cross_bram_braddr_rrrr];


        // For testing begin
	//reg [RAM_WIDTH-1:0] 	   xxram_t[0:RAM_LENGTH-1];
	//reg [RAM_WIDTH-1:0] 	   xxb_read_data_r;
	//reg [RAM_WIDTH-1:0] 	   xxb_read_data_rr;

	//initial
	//  begin
	//     integer ii;
	//     for(ii=0; ii<RAM_LENGTH; ii=ii+1)
	//       xxram_t[ii] = 0;
	//  end

	//`ra if(a_write_enable_rr) xxram_t[a_write_addr_rr] <= a_write_data_rr;

        //`ra #3 if(a_write_enable_rr) $display("Should be Write to 0x%04X of 0x%08X\n", a_write_addr_rr, a_write_data_rr);

	//`rb xxb_read_data_r <= xxram_t[b_read_addr_rr];

        //reg [RAM_WIDTH-1:0] xxb_read_data_rx = 0;
        
	//`rb xxb_read_data_rx  <= xxb_read_data_r;
	//`rb xxb_read_data_rr  <= xxb_read_data_rx;
        // For testing end
        
        //`rb #5 $display("    read bram %d addr 0x%04X of 0x%08X should be 0x%08X", cross_bram_braddr_rrrr, single_bram_braddr_rr, b_read_data_rr, xxb_read_data_rr);

        
     end

           


   else
     begin : gen_blk_no_implementation
        error_module error(clk);  // Need to define a RAM implementation.  DELAY must be set in outer module to match.
     end // else: !if( USE_BRAM )

   for(item=0; item<RAM_ITEM_NUMBER; item = item + 1)
     `w b_read_data_ro[item] = b_read_data_rr[item*RAM_ITEM_WIDTH +: RAM_ITEM_WIDTH];
	
endgenerate

     
endmodule // dual_port_ram_inner

/* verilator lint_on WIDTHTRUNC      */
/* verilator lint_on WIDTHEXPAND     */
/* verilator lint_on DECLFILENAME    */
