`timescale 1ps / 1ps module mem_read_D #( parameter integer N1 = 4, parameter integer N2 = 4, parameter integer MATRIXSIZE_W = 16, parameter integer ADDR_W = 12 ) ( input logic clk, input logic rst, input logic [MATRIXSIZE_W-1:0] M3, input logic [MATRIXSIZE_W-1:0] M1dN1, input logic valid_D, output logic [ADDR_W-1:0] rd_addr_D, output logic [N1-1:0] activate_D ); // write your code here logic [MATRIXSIZE_W-1:0] row_mod; // row % N1 logic [MATRIXSIZE_W-1:0] row_div; // row / N1 logic [MATRIXSIZE_W-1:0] col; logic [MATRIXSIZE_W-1:0] col_mod; logic [MATRIXSIZE_W-1:0] col_div; always_ff @(posedge clk) begin if(rst) begin rd_addr_D <= 0; activate_D <= 0; col <= 0; row_mod <= 0; row_div <= 0; col_mod <= 0; col_div <= 0; end else begin if(valid_D) begin if(col < M3-1) begin col <= col + 1; if (col_mod < N2 - 1) begin col_mod <= col_mod + 1; end else begin col_mod <= 0; end end else begin col <= 0; col_mod <= 0; if (row_mod == N1 - 1) begin row_mod <= 0; row_div <= row_div + M3; end else begin row_mod <= row_mod + 1; end end rd_addr_D <= row_div + col + (N2-1)-(col_mod << 1) ; activate_D <= 1 << row_mod; end else begin rd_addr_D <= 0; activate_D <= 0; end end end endmodule