语音唤醒是各智能音箱都在使用的,是当前人机语音交互的开始。之前使用的 snowboy ,似乎已“牺牲”,刚使用PicoVoice。比较可惜的是它还不支持中文,不过我想只使用它的唤醒,那也足够了。
Porcupine以块(帧)的形式接收音频。 .frame_length 属性提供每个帧的大小。Porcupine接受16 kHz音频与16位样本。对于每一帧,Porcupine返回一个代表检测到的关键字的数字。 -1 表示未检测到。正索引对应于关键字检测。
pip3 install pvporcupine
注册Picovoice控制台 https://console.picovoice.ai/ 获取到AccessKey.
Github github.com/Picovoice/porcupine
它内置了一些关键字模型
import pvporcupine
for keyword in pvporcupine.KEYWORDS:
print(keyword)
看关键词检测的实例:
import pvporcupine
from pvrecorder import PvRecorder
porcupine = pvporcupine.create(access_key=access_key, keywords=keywords)
recoder = PvRecorder(device_index=-1, frame_length=porcupine.frame_length)
try:
recoder.start()
while True:
keyword_index = porcupine.process(recoder.read())
if keyword_index >= 0:
print(f"Detected {keywords[keyword_index]}")
except KeyboardInterrupt:
recoder.stop()
finally:
porcupine.delete()
recoder.delete()
看起来还有golang版本:
github.com/Picovoice/porcupine/demo/go/micdemo
go run porcupine_mic_demo.go -access_key "" -keywords “computer,picovoice” 测试,使用了内置的两个唤醒词。
或者使用参数 -keyword_paths “/home/ease/tools/hi-easy.ppn” 调用自己的唤醒词,多个自定唤醒词使用逗号分隔(不能内置关键词和自制关键词一起用?)
或者使用此参数来调整敏感度:-sensitivities 0.3,0.6
以下代码修改过
package main
import (
"flag"
"fmt"
"log"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
porcupine "github.com/Picovoice/porcupine/binding/go/v3"
pvrecorder "github.com/Picovoice/pvrecorder/binding/go"
)
func main() {
accessKeyArg := flag.String("access_key", "", "AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)")
keywordsArg := flag.String("keywords", "", fmt.Sprintf("Comma-separated list of built-in keywords. Available options are: %+q", porcupine.BuiltInKeywords))
keywordPathsArg := flag.String("keyword_paths", "", "Comma-separated list of paths to keyword model files. "+
"If not set it will be populated from -keywords argument")
libraryPathArg := flag.String("library_path", "", "Path to Porcupine dynamic library file")
modelPathArg := flag.String("model_path", "", "Path to Porcupine model file")
sensitivitiesArg := flag.String("sensitivities", "", "Comma-separated list of sensitivity values for detecting keywords."+
"Each value should be a number within [0, 1]. A higher "+
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. "+
"If not set 0.5 will be used.")
flag.Parse()
p := porcupine.Porcupine{}
if *accessKeyArg == "" {
log.Fatalf("AccessKey is required.")
}
p.AccessKey = *accessKeyArg
// validate library path
if *libraryPathArg != "" {
libraryPath, _ := filepath.Abs(*libraryPathArg)
if _, err := os.Stat(libraryPath); os.IsNotExist(err) {
log.Fatalf("Could not find library file at %s", libraryPath)
}
p.LibraryPath = libraryPath
}
// validate model path
if *modelPathArg != "" {
modelPath, _ := filepath.Abs(*modelPathArg)
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
log.Fatalf("Could not find model file at %s", modelPath)
}
p.ModelPath = modelPath
}
// validate keyword arguments
keywordPathsSplit := strings.Split(*keywordPathsArg, ",")
if *keywordPathsArg == "" || len(keywordPathsSplit) == 0 {
keywordsSplit := strings.Split(*keywordsArg, ",")
if *keywordsArg == "" || len(keywordsSplit) == 0 {
log.Fatal("No built-in keywords or keyword model files were provided.")
}
for _, k := range keywordsSplit {
builtInKeyword := porcupine.BuiltInKeyword(k)
if !builtInKeyword.IsValid() {
log.Fatalf("'%s' is not a valid built-in keyword. Available options are: %+q", k, porcupine.BuiltInKeywords)
}
p.BuiltInKeywords = append(p.BuiltInKeywords, builtInKeyword)
}
} else {
for _, k := range keywordPathsSplit {
keywordPath, _ := filepath.Abs(k)
if _, err := os.Stat(keywordPath); os.IsNotExist(err) {
log.Fatalf("Could not find keyword file at %s", keywordPath)
}
p.KeywordPaths = append(p.KeywordPaths, keywordPath)
}
}
// validate sensitivities
sensitivitiesSplit := strings.Split(*sensitivitiesArg, ",")
if *sensitivitiesArg == "" || len(sensitivitiesSplit) == 0 {
for range p.KeywordPaths {
p.Sensitivities = append(p.Sensitivities, 0.5)
}
} else {
for _, sensitivityStr := range sensitivitiesSplit {
sensitivityFloat, err := strconv.ParseFloat(sensitivityStr, 32)
if err != nil || sensitivityFloat < 0 || sensitivityFloat > 1 {
log.Fatalf("Sensitivity value of '%s' is invalid. Must be a float32 between [0, 1].", sensitivityStr)
}
p.Sensitivities = append(p.Sensitivities, float32(sensitivityFloat))
}
}
err := p.Init()
if err != nil {
log.Fatal(err)
}
defer func() {
err := p.Delete()
if err != nil {
log.Fatalf("Failed to release resources: %s", err)
}
}()
recorder := pvrecorder.NewPvRecorder(porcupine.FrameLength)
recorder.DeviceIndex = -1
if err := recorder.Init(); err != nil {
log.Fatalf("Error: %s.\n", err.Error())
}
defer recorder.Delete()
if err := recorder.Start(); err != nil {
log.Fatalf("Error: %s.\n", err.Error())
}
log.Printf("Listening...")
signalCh := make(chan os.Signal, 1)
waitCh := make(chan struct{})
signal.Notify(signalCh, os.Interrupt)
go func() {
<-signalCh
close(waitCh)
}()
waitLoop:
for {
select {
case <-waitCh:
log.Println("Stopping...")
break waitLoop
default:
pcm, err := recorder.Read()
if err != nil {
log.Fatalf("Error: %s.\n", err.Error())
}
keywordIndex, err := p.Process(pcm)
if err != nil {
log.Fatal(err)
}
if keywordIndex >= 0 {
fmt.Printf("Keyword %d detected\n", keywordIndex)
}
}
}
}
因为在Pi0上使用,它居然也有Pi0上使用的c版本,需要编译:
gcc -std=c99 -O3 -o demo/respeaker-rpi0/porcupine_demo_mic \
-I include/ demo/respeaker-rpi0/porcupine_demo_mic.c \
-ldl -lasound
应先安装依靠库: sudo apt-get install libasound2-dev
./demo/respeaker-rpi0/porcupine_demo_mic \
${ACCESS_KEY} \
lib/raspberry-pi/arm11/libpv_porcupine.so \
lib/common/porcupine_params.pv \
0.65 \
plughw:CARD=seeed2micvoicec,DEV=0 \
resources/keyword_files/raspberry-pi/alexa_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/computer_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/hey\ google_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/hey\ siri_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/jarvis_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/picovoice_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/porcupine_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/bumblebee_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/terminator_raspberry-pi.ppn
根据你的唤醒词,将会改变主板上的RGB灯颜色。 看起来代码也不是很多,完全可以修改为自己的其它功能。
#include <dlfcn.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <asm/ioctl.h>
#include <linux/spi/spidev.h>
#include <signal.h>
#include <sys/ioctl.h>
#include "pv_porcupine.h"
static const uint8_t OFF_RGB[3] = {0, 0, 0};
static const uint8_t BLUE_RGB[3] = {0, 0, 255};
static const uint8_t GREEN_RGB[3] = {0, 255, 0};
static const uint8_t ORANGE_RGB[3] = {255, 128, 0};
static const uint8_t PINK_RGB[3] = {255, 51, 153};
static const uint8_t PURPLE_RGB[3] = {128, 0, 128};
static const uint8_t RED_RGB[3] = {255, 0, 0};
static const uint8_t WHITE_RGB[3] = {255, 255, 255};
static const uint8_t YELLOW_RGB[3] = {255, 255, 51};
static volatile bool is_interrupted = false;
/*
// LED SPI Documentation:
https://github.com/torvalds/linux/blob/master/include/uapi/linux/spi/spi.h
https://github.com/torvalds/linux/blob/master/include/uapi/linux/spi/spidev.h
https://cdn-shop.adafruit.com/datasheets/APA102.pdf
*/
static const uint8_t spi_mode = 0;
static const uint8_t spi_BPW = 8;
static const uint32_t spi_speed = 6000000;
static const uint16_t spi_delay = 0;
static int spidev_fd = -1;
static void setup_spi() {
spidev_fd = open("/dev/spidev0.0", O_RDWR);
if (spidev_fd < 0) {
fprintf(stderr, "unable to open SPI device '%s'.\n", strerror(errno));
exit(1);
}
if (ioctl(spidev_fd, SPI_IOC_WR_MODE, &spi_mode) < 0) {
fprintf(stderr, "failed to change SPI mode '%s'.\n", strerror(errno));
exit(1);
}
if (ioctl(spidev_fd, SPI_IOC_WR_BITS_PER_WORD, &spi_BPW) < 0) {
fprintf(stderr, "failed to change SPI BPW '%s'.\n", strerror(errno));
exit(1);
}
if (ioctl(spidev_fd, SPI_IOC_WR_MAX_SPEED_HZ, &spi_speed) < 0) {
fprintf(stderr, "failed to change SPI speed '%s'.\n", strerror(errno));
exit(1);
}
}
static void spi_write_data(unsigned char *data, int len) {
struct spi_ioc_transfer spi;
memset(&spi, 0, sizeof(spi));
spi.tx_buf = (unsigned long) data;
spi.rx_buf = (unsigned long) data;
spi.len = len;
spi.delay_usecs = spi_delay;
spi.speed_hz = spi_speed;
spi.bits_per_word = spi_BPW;
if (ioctl(spidev_fd, SPI_IOC_MESSAGE(1), &spi) < 0) {
fprintf(stderr, "failed to write to SPI '%s'.\n", strerror(errno));
exit(1);
}
}
static void set_color(const uint8_t rgb[3]) {
for (int32_t i = 0; i < 4; i++) {
uint8_t zero = 0x00;
spi_write_data(&zero, 1);
}
static const uint32_t BRIGHTNESS = 1;
for (int32_t i = 0; i < 12; i++) {
uint8_t led_frame[4];
led_frame[0] = 0b11100000 | (0b00011111 & BRIGHTNESS);
led_frame[1] = rgb[2];
led_frame[2] = rgb[1];
led_frame[3] = rgb[0];
spi_write_data(led_frame, 4);
}
for (int32_t i = 0; i < 4; i++) {
uint8_t zero = 0x00;
spi_write_data(&zero, 1);
}
}
void interrupt_handler(int _) {
(void) _;
is_interrupted = true;
}
int main(int argc, char *argv[]) {
if (argc != 15) {
fprintf(stderr,
"usage : %s access_key library_path model_path sensitivity input_audio_device alexa_keyword_path "
"computer_keyword_path hey_google_keyword_path hey_siri_keyword_path jarvis_keyword_path "
"picovoice_keyword_path porcupine_keyword_path bumblebee_keyword_path terminator_keyword_path\n",
argv[0]);
exit(1);
}
signal(SIGINT, interrupt_handler);
const char *access_key = argv[1];
const char *library_path = argv[2];
const char *model_path = argv[3];
const float sensitivity = (float) atof(argv[4]);
const char *input_audio_device = argv[5];
const char **keyword_paths = (const char **) &argv[6];
const int32_t num_keywords = 9;
void *porcupine_library = dlopen(library_path, RTLD_NOW);
if (!porcupine_library) {
fprintf(stderr, "failed to open library.\n");
exit(1);
}
char *error = NULL;
const char *(*pv_status_to_string_func)(pv_status_t) = dlsym(porcupine_library, "pv_status_to_string");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_status_to_string' with '%s'.\n", error);
exit(1);
}
int32_t (*pv_sample_rate_func)() = dlsym(porcupine_library, "pv_sample_rate");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_sample_rate' with '%s'.\n", error);
exit(1);
}
pv_status_t (*pv_porcupine_init_func)(const char *, const char *, int32_t, const char *const *, const float *, pv_porcupine_t **) =
dlsym(porcupine_library, "pv_porcupine_init");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_porcupine_init' with '%s'.\n", error);
exit(1);
}
void (*pv_porcupine_delete_func)(pv_porcupine_t *) = dlsym(porcupine_library, "pv_porcupine_delete");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_porcupine_delete' with '%s'.\n", error);
exit(1);
}
pv_status_t (*pv_porcupine_process_func)(pv_porcupine_t *, const int16_t *, int32_t *) = dlsym(porcupine_library, "pv_porcupine_process");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_porcupine_process' with '%s'.\n", error);
exit(1);
}
int32_t (*pv_porcupine_frame_length_func)() = dlsym(porcupine_library, "pv_porcupine_frame_length");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "failed to load 'pv_porcupine_frame_length' with '%s'.\n", error);
exit(1);
}
pv_porcupine_t *porcupine = NULL;
float sensitivities[num_keywords];
for (int32_t i = 0; i < num_keywords; i++) {
sensitivities[i] = sensitivity;
}
pv_status_t status = pv_porcupine_init_func(access_key, model_path, num_keywords, keyword_paths, sensitivities, &porcupine);
if (status != PV_STATUS_SUCCESS) {
fprintf(stderr, "'pv_porcupine_init' failed with '%s'\n", pv_status_to_string_func(status));
exit(1);
}
snd_pcm_t *alsa_handle = NULL;
int error_code = snd_pcm_open(&alsa_handle, input_audio_device, SND_PCM_STREAM_CAPTURE, 0);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_open' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
snd_pcm_hw_params_t *hardware_params = NULL;
error_code = snd_pcm_hw_params_malloc(&hardware_params);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_malloc' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params_any(alsa_handle, hardware_params);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_any' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params_set_access(alsa_handle, hardware_params, SND_PCM_ACCESS_RW_INTERLEAVED);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_set_access' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params_set_format(alsa_handle, hardware_params, SND_PCM_FORMAT_S16_LE);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_set_format' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params_set_rate(alsa_handle, hardware_params, pv_sample_rate_func(), 0);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_set_rate' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params_set_channels(alsa_handle, hardware_params, 1);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params_set_channels' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
error_code = snd_pcm_hw_params(alsa_handle, hardware_params);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_hw_params' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
snd_pcm_hw_params_free(hardware_params);
error_code = snd_pcm_prepare(alsa_handle);
if (error_code != 0) {
fprintf(stderr, "'snd_pcm_prepare' failed with '%s'\n", snd_strerror(error_code));
exit(1);
}
const int32_t frame_length = pv_porcupine_frame_length_func();
int16_t *pcm = malloc(frame_length * sizeof(int16_t));
if (!pcm) {
fprintf(stderr, "failed to allocate memory for audio buffer\n");
exit(1);
}
setup_spi();
fprintf(stdout, "[Listening]\n");
while (!is_interrupted) {
const int count = snd_pcm_readi(alsa_handle, pcm, frame_length);
if (count < 0) {
fprintf(stderr, "'snd_pcm_readi' failed with '%s'\n", snd_strerror(count));
exit(1);
} else if (count != frame_length) {
fprintf(stderr, "read %d frames instead of %d\n", count, frame_length);
exit(1);
}
int32_t keyword_index = -1;
status = pv_porcupine_process_func(porcupine, pcm, &keyword_index);
if (status != PV_STATUS_SUCCESS) {
fprintf(stderr, "'pv_porcupine_process' failed with '%s'\n", pv_status_to_string_func(status));
exit(1);
}
if (keyword_index != -1) {
static const char *KEYWORDS[] = {
"Alexa",
"Computer",
"Hey Google",
"Hey Siri",
"Jarvis",
"Picovoice",
"Porcupine",
"Bumblebee",
"Terminator",
};
fprintf(stdout, "detected '%s'\n", KEYWORDS[keyword_index]);
static const char *COLORS[] = {"yellow", "white", "red", "purple", "pink", "green", "blue", "orange", "off"};
switch (keyword_index) {
case 0:
set_color(YELLOW_RGB);
break;
case 1:
set_color(WHITE_RGB);
break;
case 2:
set_color(RED_RGB);
break;
case 3:
set_color(PURPLE_RGB);
break;
case 4:
set_color(PINK_RGB);
break;
case 5:
set_color(GREEN_RGB);
break;
case 6:
set_color(BLUE_RGB);
break;
case 7:
set_color(ORANGE_RGB);
break;
case 8:
set_color(OFF_RGB);
break;
}
}
}
free(pcm);
snd_pcm_close(alsa_handle);
pv_porcupine_delete_func(porcupine);
dlclose(porcupine_library);
close(spidev_fd);
return 0;
}
通过这个示例,发现资源目录下面其实有中文唤醒词:
resources/keyword_files_zh/raspberry-pi 这是pi使用的:
你好_raspberry-pi.ppn 咖啡_raspberry-pi.ppn 水饺_raspberry-pi.ppn 豪猪_raspberry-pi.ppn
必须要配合中文的模型文件 lib/common/porcupine_params_zh.pv