Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add realtime kws example for stm32f4xx #8

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions Deployment/Examples/stm32f4_realtime_test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
DNN KWS on STM32F4
==================

Use STM32F407's 12 bit ADC to get audio data from a microphone, and then run DNN KWS to detect speech command.

### Hardware
+ [Arch Max (STM32F407)](https://www.seeedstudio.com/Arch-Max-v1.1-p-2632.html)
+ [Grove Sound Sensor (must add a bias for the microphone)](https://github.com/xiongyihui/ML-KWS-for-MCU/issues/1)

```
TIM3 (16K)
\
Microphone -> ADC (PC0) -> DMA
```

![](https://statics3.seeedstudio.com/images/product/102080004%200.jpg)


### Build & Run

In this example, the KWS inference is run on the audio data from ADC.
First create a new project and install any python dependencies prompted when project is created for the first time after the installation of mbed-cli.
```bash
mbed new kws_simple_test --mbedlib
```
Fetch the required mbed libraries for compilation.
```bash
cd kws_simple_test
mbed deploy
```
Compile the code for the mbed board (for example Arch Max STM32F407).
```bash
mbed compile -m ARCH_MAX -t GCC_ARM --source . --source ../Source --source ../Examples/stm32f4_realtime_test --source ../CMSIS_5/CMSIS/NN/Include --source ../CMSIS_5/CMSIS/NN/Source --source ../CMSIS_5/CMSIS/DSP/Include --source ../CMSIS_5/CMSIS/DSP/Source --source ../CMSIS_5/CMSIS/Core/Include --profile ../release_O3.json
```

Copy the binary (.bin) to the board (Make sure the board is detected and mounted). Open a serial terminal (e.g. putty or minicom) and see the final classification output on screen.
```bash
cp ./BUILD/ARCH_MAX/GCC_ARM/kws_simple_test.bin /media/$USER/MBED
sudo minicom -D /dev/ttyACM0 -b 576000
```
257 changes: 257 additions & 0 deletions Deployment/Examples/stm32f4_realtime_test/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
/*
* Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* Description: Example code for running keyword spotting on Cortex-M boards
*/

// How to use STM32F4xx ADC - https://visualgdb.com/tutorials/arm/stm32/adc/
// Timer 3 (16000 fs) -> ADC -> DMA

#include "kws.h"
#include "wav_data.h"

#define AUDIO_BLOCK_SIZE (1 * FRAME_LEN)
#define ADC_BUFFER_LENGTH (AUDIO_BLOCK_SIZE * 2)

uint32_t audio_input_buffer[ADC_BUFFER_LENGTH]; // 2 for ping-pong buffer
int16_t audio_buffer[16000] = WAVE_DATA;
// int16_t audio_buffer[AUDIO_BLOCK_SIZE];

volatile uint32_t sample_count = 0;
q7_t scratch_buffer[SCRATCH_BUFFER_SIZE];
char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down","left","right","on","off","stop","go"};

Serial pc(USBTX, USBRX);
Timer T;

ADC_HandleTypeDef g_AdcHandle;
DMA_HandleTypeDef g_DmaHandle;


void ConfigureTIM(void)
{
__TIM3_CLK_ENABLE();

TIM_HandleTypeDef s_TimerInstance = {
.Instance = TIM3
};

s_TimerInstance.Init.Prescaler = 0;
s_TimerInstance.Init.CounterMode = TIM_COUNTERMODE_UP;

// 16 KHz, from 84 MHz TIM2CLK (TIM2CLK = HCLK/2)
s_TimerInstance.Init.Period = (84000000 / 16000) - 1;
s_TimerInstance.Init.ClockDivision = TIM_CLOCKDIVISION_DIV1;
s_TimerInstance.Init.RepetitionCounter = 0;

TIM_MasterConfigTypeDef sMasterConfig;
// sMasterConfig.MasterOutputTrigger = TIM_TRGO_RESET;
sMasterConfig.MasterOutputTrigger = TIM_TRGO_UPDATE;
sMasterConfig.MasterSlaveMode = TIM_MASTERSLAVEMODE_DISABLE;
HAL_TIMEx_MasterConfigSynchronization(&s_TimerInstance, &sMasterConfig);

HAL_TIM_Base_Init(&s_TimerInstance);
HAL_TIM_Base_Start(&s_TimerInstance);
}

void ConfigureADC()
{
GPIO_InitTypeDef gpioInit;

__GPIOC_CLK_ENABLE();
__ADC1_CLK_ENABLE();

gpioInit.Pin = GPIO_PIN_0;
gpioInit.Mode = GPIO_MODE_ANALOG;
gpioInit.Pull = GPIO_NOPULL;
HAL_GPIO_Init(GPIOC, &gpioInit);

HAL_NVIC_SetPriority(ADC_IRQn, 0, 0);
HAL_NVIC_EnableIRQ(ADC_IRQn);

ADC_ChannelConfTypeDef adcChannel;

g_AdcHandle.Instance = ADC1;

g_AdcHandle.Init.ClockPrescaler = ADC_CLOCKPRESCALER_PCLK_DIV2;
g_AdcHandle.Init.Resolution = ADC_RESOLUTION_12B;
g_AdcHandle.Init.ScanConvMode = DISABLE;
g_AdcHandle.Init.ContinuousConvMode = DISABLE;
g_AdcHandle.Init.DiscontinuousConvMode = DISABLE;
g_AdcHandle.Init.NbrOfDiscConversion = 0;
g_AdcHandle.Init.ExternalTrigConvEdge = ADC_EXTERNALTRIGCONVEDGE_RISING;
g_AdcHandle.Init.ExternalTrigConv = ADC_EXTERNALTRIGCONV_T3_TRGO;
g_AdcHandle.Init.DataAlign = ADC_DATAALIGN_RIGHT;
g_AdcHandle.Init.NbrOfConversion = 1;
g_AdcHandle.Init.DMAContinuousRequests = ENABLE;
g_AdcHandle.Init.EOCSelection = DISABLE;

HAL_ADC_Init(&g_AdcHandle);

adcChannel.Channel = ADC_CHANNEL_10;
adcChannel.Rank = 1;
adcChannel.SamplingTime = ADC_SAMPLETIME_480CYCLES;
adcChannel.Offset = 0;

if (HAL_ADC_ConfigChannel(&g_AdcHandle, &adcChannel) != HAL_OK)
{
asm("bkpt 255");
}
}

void ConfigureDMA()
{
__DMA2_CLK_ENABLE();
g_DmaHandle.Instance = DMA2_Stream4;

g_DmaHandle.Init.Channel = DMA_CHANNEL_0;
g_DmaHandle.Init.Direction = DMA_PERIPH_TO_MEMORY;
g_DmaHandle.Init.PeriphInc = DMA_PINC_DISABLE;
g_DmaHandle.Init.MemInc = DMA_MINC_ENABLE;
g_DmaHandle.Init.PeriphDataAlignment = DMA_PDATAALIGN_WORD;
g_DmaHandle.Init.MemDataAlignment = DMA_MDATAALIGN_WORD;
g_DmaHandle.Init.Mode = DMA_CIRCULAR;
g_DmaHandle.Init.Priority = DMA_PRIORITY_HIGH;
g_DmaHandle.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
g_DmaHandle.Init.FIFOThreshold = DMA_FIFO_THRESHOLD_HALFFULL;
g_DmaHandle.Init.MemBurst = DMA_MBURST_SINGLE;
g_DmaHandle.Init.PeriphBurst = DMA_PBURST_SINGLE;

HAL_DMA_Init(&g_DmaHandle);

__HAL_LINKDMA(&g_AdcHandle, DMA_Handle, g_DmaHandle);

HAL_NVIC_SetPriority(DMA2_Stream4_IRQn, 0, 0);
HAL_NVIC_EnableIRQ(DMA2_Stream4_IRQn);
}

extern "C" {

void HAL_ADC_ConvCpltCallback(ADC_HandleTypeDef *AdcHandle)
{
// copy the new recording data
for (int i = 0; i < AUDIO_BLOCK_SIZE; i++)
{
audio_buffer[i] = audio_input_buffer[AUDIO_BLOCK_SIZE + i] - 1024 - 335;
}
sample_count++;
}

void HAL_ADC_ConvHalfCpltCallback(ADC_HandleTypeDef *AdcHandle)
{
// copy the new recording data
for (int i = 0; i < AUDIO_BLOCK_SIZE; i++)
{
audio_buffer[i] = audio_input_buffer[i] - 1024 - 335;
}
sample_count++;
}

void DMA2_Stream4_IRQHandler()
{
HAL_DMA_IRQHandler(&g_DmaHandle);
}

void ADC_IRQHandler()
{
HAL_ADC_IRQHandler(&g_AdcHandle);
}
}

int main()
{
KWS kws(audio_buffer,scratch_buffer);

pc.baud(576000);
printf("---- KWS ----\r\n");

kws.extract_features();

ConfigureADC();
ConfigureDMA();
HAL_ADC_Start_DMA(&g_AdcHandle, audio_input_buffer, ADC_BUFFER_LENGTH);

ConfigureTIM();

uint32_t detect_count = 0;
uint32_t start;
uint32_t end;
uint32_t last = 0;

T.start();
while (1) {
if (detect_count < sample_count) {
detect_count++;

// int32_t sum = 0;
// int16_t max = 0;
// int16_t min = 0;
// for (int i=0; i<AUDIO_BLOCK_SIZE; i++) {
// sum += audio_buffer[i];

// if (audio_buffer[i] > max) {
// max = audio_buffer[i];
// } else if (audio_buffer[i] < min) {
// min = audio_buffer[i];
// }
// }


start = T.read_us();

//Averaging window for smoothing out the output predictions
int averaging_window_len = 3; //i.e. average over 6 inferences or 240ms
int detection_threshold = 80; //in percent

kws.extract_features(1); //extract mfcc features

kws.classify(); //classify using dnn

kws.average_predictions(averaging_window_len);

int max_ind = kws.get_top_detection(kws.averaged_output);


end = T.read_us();

// if ((max_ind) != 0 && (kws.averaged_output[max_ind] >= detection_threshold*128/100))
{
printf("Detected %s (%d%%)\r\n",output_class[max_ind],((int)kws.averaged_output[max_ind]*100/128));
}


// if ((detect_count & 0x1F) == 0)
// {
// printf("sum: %d, avg: %d, max: %d, min: %d\r\n", sum, sum / AUDIO_BLOCK_SIZE, max, min);
// printf("processing time: %d us, period: %d @ %d, %d\r\n", end - start, end - last, sample_count, detect_count);
// }


last = end;
} else {
// TODO: sleep
}

}

T.stop();


return 0;
}
1 change: 1 addition & 0 deletions Deployment/Examples/stm32f4_realtime_test/wav_data.h

Large diffs are not rendered by default.