// Copyright 2018-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _dspm_mult_H_
#define _dspm_mult_H_

#include "dsp_err.h"
#include "dspm_mult_platform.h"

#ifdef __cplusplus
extern "C"
{
#endif

/**@{*/
/**
 * @brief   Matrix multiplication
 *
 * Matrix multiplication for two floating point matrices: C[m][k] = A[m][n] * B[n][k]
 * The extension (_ansi) use ANSI C and could be compiled and run on any platform.
 * The extension (_ae32) is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[m][n]
 * @param[in] B  input matrix B[n][k]
 * @param C  result matrix C[m][k]
 * @param[in] m  matrix dimension
 * @param[in] n  matrix dimension
 * @param[in] k  matrix dimension
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */
esp_err_t dspm_mult_f32_ansi(const float *A, const float *B, float *C, int m, int n, int k);
esp_err_t dspm_mult_f32_ae32(const float *A, const float *B, float *C, int m, int n, int k);
/**@}*/


/**
 * @brief   Matrix multiplication A[3x3]xB[3x1]
 *
 * Matrix multiplication for two floating point matrices 3x3 and 3x1: C[1][3] = A[3][3] * B[3][1]
 * The implementation is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[3][3]
 * @param[in] B  input matrix/vector B[3][1]
 * @param C  result matrix/vector C[3][3]
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */
esp_err_t dspm_mult_3x3x1_f32_ae32(const float *A, const float *B, float *C);

/**
 * @brief   Matrix multiplication A[3x3]xB[3x3]
 *
 * Matrix multiplication for two square 3x3 floating point matrices: C[3][3] = A[3][3] * B[3][3]
 * The implementation is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[3][3]
 * @param[in] B  input matrix B[3][3]
 * @param C  result matrix C[3][3]
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */
esp_err_t dspm_mult_3x3x3_f32_ae32(const float *A, const float *B, float *C);

/**
 * @brief   Matrix multiplication A[4x4]xB[4x1]
 *
 * Matrix multiplication for two floating point matrices 4x4 and 4x1: C[1][4] = A[4][4] * B[4][1]
 * The implementation is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[4][4]
 * @param[in] B  input matrix/vector B[4][1]
 * @param C  result matrix/vector C[4][4]
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */

esp_err_t dspm_mult_4x4x1_f32_ae32(const float *A, const float *B, float *C);

/**
 * @brief   Matrix multiplication A[4x4]xB[4x4]
 *
 * Matrix multiplication for two square 3x3 floating point matrices: C[4][4] = A[4][4] * B[4][4]
 * The implementation is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[4][4]
 * @param[in] B  input matrix B[4][4]
 * @param C  result matrix C[4][4]
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */
esp_err_t dspm_mult_4x4x4_f32_ae32(const float *A, const float *B, float *C);

/**@{*/
/**
 * @brief   Matrix multiplication 16 bit signeg int
 *
 * Matrix multiplication for two signed 16 bit fixed point matrices: C[m][k] = (A[m][n] * B[n][k]) >> (15- shift)
 * The extension (_ansi) use ANSI C and could be compiled and run on any platform.
 * The extension (_ae32) is optimized for ESP32 chip.
 *
 * @param[in] A  input matrix A[m][n]
 * @param[in] B  input matrix B[n][k]
 * @param C  result matrix C[m][k]
 * @param[in] m  matrix dimension
 * @param[in] n  matrix dimension
 * @param[in] k  matrix dimension
 * @param[in] shift every result will be shifted and stored as 16 bit signed value.
 * @return
 *      - ESP_OK on success
 *      - One of the error codes from DSP library
 */
esp_err_t dspm_mult_s16_ansi(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
esp_err_t dspm_mult_s16_ae32(const int16_t *A, const int16_t *B, int16_t *C, int m, int n, int k, int shift);
/**@}*/

#ifdef __cplusplus
}
#endif

#if CONFIG_DSP_OPTIMIZED

#if (dspm_mult_s16_ae32_enabled == 1)
#define dspm_mult_s16 dspm_mult_s16_ae32
#else
#define dspm_mult_s16 dspm_mult_s16_ansi
#endif

#if (dspm_mult_f32_ae32_enabled == 1)
#define dspm_mult_f32 dspm_mult_f32_ae32
#else
#define dspm_mult_f32 dspm_mult_f32_ansi
#endif

#if (dspm_mult_3x3x1_f32_ae32_enabled == 1)
#define dspm_mult_3x3x1_f32 dspm_mult_3x3x1_f32_ae32
#else
#define dspm_mult_3x3x1_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 3, 3, 1)
#endif
#if (dspm_mult_3x3x3_f32_ae32_enabled == 1)
#define dspm_mult_3x3x3_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 3, 3, 3)
#else
#define dsps_sub_f32 dsps_sub_f32_ansi
#endif
#if (dspm_mult_4x4x1_f32_ae32_enabled == 1)
#define dspm_mult_4x4x1_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 4, 4, 1)
#else
#define dsps_sub_f32 dsps_sub_f32_ansi
#endif
#if (dspm_mult_4x4x4_f32_ae32_enabled == 1)
#define dspm_mult_4x4x4_f32 dspm_mult_4x4x4_f32_ae32
#else
#define dspm_mult_4x4x4_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 4, 4, 4)
#endif

#else
#define dspm_mult_s16 dspm_mult_s16_ansi
#define dspm_mult_f32 dspm_mult_f32_ansi
#define dspm_mult_3x3x1_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 3, 3, 1)
#define dsps_sub_f32 dsps_sub_f32_ansi
#define dsps_sub_f32 dsps_sub_f32_ansi
#define dspm_mult_4x4x4_f32(A,B,C) dspm_mult_f32_ansi(A,B,C, 4, 4, 4)
#endif // CONFIG_DSP_OPTIMIZED


#endif // _dspm_mult_H_