// SPDX-License-Identifier: CC-BY-NC-SA-4.0
//
// Copyright (C) 2025 Bit by Bit Signal Processing LLC  (https://bxbsp.com)
//
// This work is placed under the "Creative Commons Attribution
// NonCommercial ShareAlike 4.0 International" license, known
// by the shortened acronym "CC-BY-NC-SA-4.0".
//
// This work is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// A CC-BY-NC-SA-4.0 license allows you to use, distribute, and modify
// this work, so long as such uses are non-commercial in nature,
// so long as any derived works are offered on the same terms,
// and so long as attribution is given to the original author.
// For further details, see the Creative Commons License
// "CC-BY-NC-SA-4.0".
//
// You should have received a copy of the CC-BY-NC-SA-4.0 license
// along with this work. If not, see
// <https://creativecommons.org/licenses/by-nc-sa/4.0/>.
//

//`timescale 1ns / 1ps

`define w assign
`define raxis always@(posedge IN_AXIS_aclk)
`define raxi  always@(posedge S_AXI_ACLK)

//
// Works for non-power-of-2 samples per clock by rounding up to next higher power of 2.  This leaves
// gaps in the addressing where in a block of AXI_SAMPLES_PER_MEMADDR size of samples has its
// first AXIS_SAMPLES_PER_CLOCK samples valid and then the rest zero padding.
//
// Only works if AXI_SAMPLES_PER_WORD evenly divides into AXIS_SAMPLES_PER_MEMADDR.
//
module real_sample_capture
  #(
    // Capture parameters
    parameter BRAM_LENGTH                  = 4096,

    parameter AXIS_SAMPLES_PER_CLOCK       = 8,
    parameter AXIS_BITS_PER_SAMPLE         = 16,
    parameter AXIS_ACTUAL_BITS_PER_SAMPLE  = 12,
    parameter AXIS_SAMPLES_TO_CAPTURE      = BRAM_LENGTH * AXIS_SAMPLES_PER_CLOCK,
    parameter AXIS_BUS_WIDTH               = AXIS_BITS_PER_SAMPLE * AXIS_SAMPLES_PER_CLOCK,
    parameter SYNC_CAPTURE_WITH_TLAST      = 0,
    
    // ARM interface parameters
    parameter AXI_BITS_PER_WORD            = 32,
    parameter AXI_BYTES_PER_WORD           = AXI_BITS_PER_WORD/8,
    parameter AXI_BITS_PER_SAMPLE          = 16,
    parameter AXI_BYTES_PER_SAMPLE         = (AXI_BITS_PER_SAMPLE+7)/8,
    parameter AXI_SAMPLES_PER_WORD         = AXI_BITS_PER_WORD / AXI_BITS_PER_SAMPLE,
    parameter AXI_SAMPLES_PER_MEMADDR      = 1<<$clog2(AXIS_SAMPLES_PER_CLOCK),  // round up to next power of 2
    parameter AXI_WORDS_PER_MEMADDR        = AXI_SAMPLES_PER_MEMADDR / AXI_SAMPLES_PER_WORD,
    parameter AXI_BYTE_ADDR_WIDTH          = $clog2( BRAM_LENGTH * AXI_SAMPLES_PER_MEMADDR * AXI_BYTES_PER_SAMPLE )
    )
   (
    // Data collection and passthrough

    (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 IN_AXIS_aclk CLK" *)
    (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF IN_AXIS:PASS_AXIS, ASSOCIATED_RESET IN_AXIS_aresetn" *)
    input                                IN_AXIS_aclk,

    (* X_INTERFACE_INFO = "xilinx.com:signal:reset:1.0 IN_AXIS_aresetn RST" *)
    input                                IN_AXIS_aresetn,

    input [AXIS_BUS_WIDTH-1:0]           IN_AXIS_tdata,
    output wire                          IN_AXIS_tready,
    input                                IN_AXIS_tvalid,
    input                                IN_AXIS_tlast,

    output reg [AXIS_BUS_WIDTH-1:0]      PASS_AXIS_tdata,
    input                                PASS_AXIS_tready,
    output reg                           PASS_AXIS_tvalid,
    output reg                           PASS_AXIS_tlast,

    // Data is continuously collected while collect is high.
    // Bringing collect low stops collection.  Collection
    // stops immediately if CAPTURE_SYNC_WITH_TLAST is 0.
    // Collection stops at the next IN_AXIS_tlast pulse if
    // SYNC_CAPTURE_WITH_TLAST is 1.  Once capture is
    // stopped, the captured data can be read at leisure
    // from software on the ARM.
    input                                collect,

    // collect_resetn is a signal that is zero when not
    // collecting or when resetn is low.  It is for
    // holding in reset things that drive collection
    // so that they start out initialized when
    // collection begins.
    output reg                           collect_resetn,
    
    //
    // This is the interface to the ARM processor
    //
    input                                S_AXI_ACLK,
    input                                S_AXI_ARESETN,
    input [AXI_BYTE_ADDR_WIDTH-1 : 0]    S_AXI_AWADDR,
    input [2 : 0]                        S_AXI_AWPROT,
    input                                S_AXI_AWVALID,
    output reg                           S_AXI_AWREADY,
    input [AXI_BITS_PER_WORD-1 : 0]      S_AXI_WDATA,
    input [AXI_BYTES_PER_WORD-1 : 0]     S_AXI_WSTRB,
    input                                S_AXI_WVALID,
    output reg                           S_AXI_WREADY,
    output reg [1 : 0]                   S_AXI_BRESP,
    output reg                           S_AXI_BVALID,
    input                                S_AXI_BREADY,
    input [AXI_BYTE_ADDR_WIDTH-1 : 0]    S_AXI_ARADDR,
    input [2 : 0]                        S_AXI_ARPROT,
    input                                S_AXI_ARVALID,
    output reg                           S_AXI_ARREADY,
    output reg [AXI_BITS_PER_WORD-1 : 0] S_AXI_RDATA,
    output reg [1 : 0]                   S_AXI_RRESP,
    output reg                           S_AXI_RVALID,
    input                                S_AXI_RREADY

);

   localparam BRAM_LENGTH_BITS             = $clog2(BRAM_LENGTH);

   genvar                                  i, j;

   `raxis PASS_AXIS_tdata  <= IN_AXIS_tdata;
   `raxis PASS_AXIS_tvalid <= IN_AXIS_tvalid;
   `raxis PASS_AXIS_tlast  <= IN_AXIS_tlast;
   
   `w IN_AXIS_tready = 1'b1;
   
   // Address bits are broken down into sections.  The first section is ADDR_BITS_SUB_SAMPLE.
   // These address bytes.  There are currently 2 bytes per 16-bit sample, so ADDR_BITS_BYTES_PER_SAMPLE
   // is $clog2(2) = 1.  Take off this 1 bit, and you're addressing samples.
   //
   // The next section is between samples within an AXI word.  The AXI word is currently 32 bits,
   // so there is 1 more bit between the 16-bit sample and the 32-bit word.  So
   // ADDR_BITS_SAMPLES_PER_WORD is 1.
   //
   // Then there are multiple 32-bit AXI words per memaddr.  Each memaddr holds
   // AXIS_SAMPLES_PER_CLOCK samples, so the number of 32-bit words is
   // AXIS_BITS_PER_SAMPLE * AXIS_SAMPLES_PER_CLOCK / AXI_BITS_PER_WORD, which
   // is 8*16/32 = 4.  It's calculated as AXI_WORDS_PER_MEMADDR.  The number
   // of bits is then 2, which is ADDR_BITS_AXI_WORDS_PER_MEMADDR.
   //
   // Above that we have bits corresponding to the address in BRAM memory.
   // This is ADDR_BITS_MEMADDR, which is the same as BRAM_LENGTH_BITS.
   // This is determined by how long a capture capability is required.
   // It's a rotating buffer, so for ease of rotation it must also be
   // a power of 2.
   //
   localparam ADDR_BITS_BYTES_PER_SAMPLE    = $clog2(AXI_BYTES_PER_SAMPLE);
   localparam ADDR_BITS_SAMPLES_PER_WORD    = $clog2(AXI_SAMPLES_PER_WORD);
   localparam ADDR_BITS_WORDS_PER_MEMADDR   = $clog2(AXI_WORDS_PER_MEMADDR);
   localparam ADDR_BITS_MEMADDR             = BRAM_LENGTH_BITS;

   localparam ADDR_BITS_BYTES   = ADDR_BITS_MEMADDR + ADDR_BITS_WORDS_PER_MEMADDR + ADDR_BITS_SAMPLES_PER_WORD + ADDR_BITS_BYTES_PER_SAMPLE;
   localparam ADDR_BITS_SAMPLES = ADDR_BITS_MEMADDR + ADDR_BITS_WORDS_PER_MEMADDR + ADDR_BITS_SAMPLES_PER_WORD;
   localparam ADDR_BITS_WORDS   = ADDR_BITS_MEMADDR + ADDR_BITS_WORDS_PER_MEMADDR;


   reg                                     collect_r;   
   reg [BRAM_LENGTH_BITS-1:0]              capture_position = 0;
   (* FALSE_PATH_DEST = 1 *) reg [BRAM_LENGTH_BITS-1:0]              capture_position_rb = 0;
   wire [BRAM_LENGTH_BITS-1:0]             read_word_addr = 0;
   wire [AXIS_ACTUAL_BITS_PER_SAMPLE-1:0]  write_data[0:AXIS_SAMPLES_PER_CLOCK-1];
   reg                                     read_enable_in;
   wire                                    read_enable_out;
   wire [ADDR_BITS_BYTES-1:0]              read_address_byte;
   wire [ADDR_BITS_SAMPLES-1:0]            read_address_sample;
   wire [ADDR_BITS_WORDS-1:0]              read_address_word;
   reg [ADDR_BITS_WORDS_PER_MEMADDR-1:0]   read_address_words_per_memaddr;
   reg [ADDR_BITS_MEMADDR-1:0]             read_address_memaddr;
   wire [AXIS_ACTUAL_BITS_PER_SAMPLE-1:0]  read_data[0:AXIS_SAMPLES_PER_CLOCK-1];
   wire                                    read_in_progress;
   wire [AXI_BITS_PER_WORD-1:0]            axi_read_data;
   

   `raxis collect_r <= collect || (collect_r && !IN_AXIS_tlast && SYNC_CAPTURE_WITH_TLAST);

   `raxis collect_resetn <= collect_r && IN_AXIS_aresetn;
   
   // Rotating buffer address.  Data should only be read from the memories
   // when collection is disabled, so this is also a constant during read clocks
   // Buffer length is supposed to be a power of 2, so can let the address roll
   // over naturally.
   `raxis capture_position <= !IN_AXIS_aresetn                             ?  0                     :
                              collect_r                                    ?  capture_position + 1  :
                              /* default */                                   capture_position      ;
   
   `raxi capture_position_rb <= capture_position;  // For use on the b side



   generate
      for(i=0; i<AXIS_SAMPLES_PER_CLOCK; i=i+1)
        begin
           wire [AXIS_BITS_PER_SAMPLE-1:0] in_sample;
           `w in_sample = IN_AXIS_tdata[ i*AXIS_BITS_PER_SAMPLE +: AXIS_BITS_PER_SAMPLE ];
           `w write_data[i] = in_sample[AXIS_BITS_PER_SAMPLE-1 -: AXIS_ACTUAL_BITS_PER_SAMPLE ];
        end
   endgenerate

       
   capture_ram
     #(
       .RAM_ITEM_WIDTH   ( AXIS_ACTUAL_BITS_PER_SAMPLE   ), 
       .RAM_ITEM_NUMBER  ( AXIS_SAMPLES_PER_CLOCK        ), 
       .RAM_LENGTH       ( BRAM_LENGTH                   )
       )
   bram
     (
      .reset_write       ( !IN_AXIS_aresetn              ),
      .clk_write         ( IN_AXIS_aclk                  ),
      .write_enable_i    ( collect_r                     ),
      .write_addr_i      ( capture_position              ),
      .write_data_i      ( write_data                    ),
      .reset_read        ( !S_AXI_ARESETN                ),
      .clk_read          ( S_AXI_ACLK                    ),
      .read_addr_i       ( read_address_memaddr          ),
      .read_data_ro      ( read_data                     ),
      .in_sync_i         ( read_enable_in                ),
      .out_sync_o        ( read_enable_out               )
      );

   
   `w  read_address_sample               = read_address_byte   >> ADDR_BITS_BYTES_PER_SAMPLE;
   `w  read_address_word                 = read_address_sample >> ADDR_BITS_SAMPLES_PER_WORD;

   `raxi read_address_memaddr           <= (S_AXI_ARREADY && S_AXI_ARVALID) ? (read_address_word>>ADDR_BITS_WORDS_PER_MEMADDR) + capture_position_rb :
                                           /* default */                      read_address_memaddr                                                   ;
   
   `raxi read_address_words_per_memaddr <= (S_AXI_ARREADY && S_AXI_ARVALID) ? read_address_word[ADDR_BITS_WORDS_PER_MEMADDR-1:0]  :
                                           /* default */                      read_address_words_per_memaddr                      ;   
   
   //
   // Rearrange the bits so that the final selection of words looks a little like a memory
   // access.  This will turn into a mux when synthesized.  axi_samples is the array
   // that will be indexed to form the mux.  Need to wire it up to the correct bits of
   // the word read from memory, read_data.
   //

   wire [AXI_BITS_PER_WORD-1:0]                   axi_samples[0:AXI_WORDS_PER_MEMADDR-1];

   generate
      for(i=0; i<AXI_WORDS_PER_MEMADDR; i=i+1)
        begin
           wire [AXI_BITS_PER_WORD-1:0] axi_word;
           
           for(j=0; j<AXI_SAMPLES_PER_WORD; j=j+1)
             begin
                wire [AXIS_ACTUAL_BITS_PER_SAMPLE-1:0] axis_sample;
                wire [AXI_BITS_PER_SAMPLE-1:0]         axis_sample_extended;
                wire [AXI_BITS_PER_SAMPLE-1:0]         axi_sample;
                
                `w axis_sample          = (j+i*AXI_SAMPLES_PER_WORD >= AXIS_SAMPLES_PER_CLOCK) ? 0 : read_data[j+i*AXI_SAMPLES_PER_WORD];
                `w axis_sample_extended = axis_sample;
                `w axi_sample           = axis_sample_extended << (AXI_BITS_PER_SAMPLE-AXIS_ACTUAL_BITS_PER_SAMPLE);
                
                `w axi_word[AXI_BITS_PER_SAMPLE*j +: AXI_BITS_PER_SAMPLE] = axi_sample;
             end // for (j=0; j<AXI_SAMPLES_PER_WORD; j=j+1)
           
           `w axi_samples[i] = axi_word;
        end // for (i=0; i<AXI_WORDS_PER_MEMADDR; i=i+1)
      
   endgenerate

   `w axi_read_data = axi_samples[read_address_words_per_memaddr];




   `raxi S_AXI_AWREADY <= 1'b1;
   `raxi S_AXI_WREADY  <= 1'b1;                          
   `raxi S_AXI_BVALID  <= (!S_AXI_ARESETN)                                                 ? 1'b0         :
                          (S_AXI_BREADY && S_AXI_BVALID)                                   ? 1'b0         :
                          (S_AXI_AWREADY && S_AXI_AWVALID && S_AXI_WREADY && S_AXI_WVALID) ? 1'b1         :
                          /* default */                                                      S_AXI_BVALID ;
   `raxi S_AXI_BRESP   <= 2'b0;
   

   `raxi S_AXI_ARREADY <= (!S_AXI_ARESETN)                ?   1               :
                          S_AXI_RVALID && S_AXI_RREADY    ?   1               :
                          S_AXI_ARREADY && S_AXI_ARVALID  ?   0               :
                          /* default */                       S_AXI_ARREADY   ;

   `w read_in_progress = !S_AXI_ARREADY;
                        
   `raxi S_AXI_RVALID  <= (!S_AXI_ARESETN)                      ? 0             : 
                          read_in_progress && read_enable_out   ? 1             :
                          S_AXI_RVALID && S_AXI_RREADY          ? 0             :
                          /* default */                           S_AXI_RVALID  ;
   
   `raxi S_AXI_RRESP   <= 2'b0;
   
   `raxi S_AXI_RDATA   <= (!S_AXI_ARESETN)   ?  0              : 
                          read_enable_out    ?  axi_read_data  :
                          /* default */         S_AXI_RDATA    ;

   `w    read_address_byte   = S_AXI_ARADDR;
   `raxi read_enable_in     <= S_AXI_ARREADY && S_AXI_ARVALID && !read_in_progress;
   // read_data


   
     
endmodule
