2017-09-02 14:52:53 fkgjdkblxckvbxbgb 阅读数 3656

最近在做一个翻译戒指的项目 ,里面有语音识别,然后拿到识别的文字去翻译内容

语音识别,肯定是用讯飞的 ,但是项目的的声音来源不是手机 ,是蓝牙耳机 ,也是第一次接手这样的项目,有点蒙蔽了,去网上找了一圈

这个仁兄的代码,给了很大的帮助

 原文地址:http://blog.csdn.net/ec_boy_hl/article/details/45112493

按照他的方向 ,我写了一个demo,来测试,妥妥的 能采集到声音,实现语音识别 ,分享给大家 


重点的代码 :

  在识别的时候,设置声音来源从蓝牙

 mAudioManager.setBluetoothScoOn(true);
mAudioManager.startBluetoothSco();


识别完之后关闭(经测试,不关也可以)

mAudioManager.setBluetoothScoOn(false);
mAudioManager.stopBluetoothSco();



整体代码就不全部贴了 ,下面就是讯飞的官方demo, 加上上面的两段话就OK 了,是不是很简单 ,

相关的权限,注意权限,不然没法使用

 <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS" />
    <uses-permission android:name="android.permission.BROADCAST_STICKY" />
    <uses-permission android:name="android.permission.BLUETOOTH" />
    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <uses-permission android:name="android.permission.INTERNET" />
    <uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
    <uses-permission android:name="android.permission.ACCESS_WIFI_STATE" />
    <uses-permission android:name="android.permission.CHANGE_NETWORK_STATE" />
    <uses-permission android:name="android.permission.READ_PHONE_STATE" />
    <uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
    <uses-permission android:name="android.permission.READ_CONTACTS" />
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.WRITE_SETTINGS" />
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />

主题的代码

package com.iflytek.voicedemo;

import java.util.HashMap;
import java.util.LinkedHashMap;

import org.json.JSONException;
import org.json.JSONObject;

import android.annotation.SuppressLint;
import android.app.Activity;
import android.content.BroadcastReceiver;
import android.content.Context;
import android.content.Intent;
import android.content.IntentFilter;
import android.content.SharedPreferences;
import android.media.AudioManager;
import android.os.Bundle;
import android.os.Environment;
import android.text.TextUtils;
import android.util.Log;
import android.view.View;
import android.view.View.OnClickListener;
import android.view.Window;
import android.widget.EditText;
import android.widget.RadioGroup;
import android.widget.RadioGroup.OnCheckedChangeListener;
import android.widget.Toast;

import com.iflytek.cloud.ErrorCode;
import com.iflytek.cloud.InitListener;
import com.iflytek.cloud.LexiconListener;
import com.iflytek.cloud.RecognizerListener;
import com.iflytek.cloud.RecognizerResult;
import com.iflytek.cloud.SpeechConstant;
import com.iflytek.cloud.SpeechError;
import com.iflytek.cloud.SpeechRecognizer;
import com.iflytek.cloud.SpeechUtility;
import com.iflytek.cloud.ui.RecognizerDialog;
import com.iflytek.cloud.ui.RecognizerDialogListener;
import com.iflytek.cloud.util.ContactManager;
import com.iflytek.cloud.util.ContactManager.ContactListener;
import com.iflytek.speech.setting.IatSettings;
import com.iflytek.speech.util.ApkInstaller;
import com.iflytek.speech.util.FucUtil;
import com.iflytek.speech.util.JsonParser;
import com.iflytek.sunflower.FlowerCollector;

public class IatDemo extends Activity implements OnClickListener {
	private static String TAG = IatDemo.class.getSimpleName();
	// 语音听写对象
	private SpeechRecognizer mIat;
	// 语音听写UI
	private RecognizerDialog mIatDialog;
	// 用HashMap存储听写结果
	private HashMap<String, String> mIatResults = new LinkedHashMap<String, String>();

	private EditText mResultText;
	private Toast mToast;
	private SharedPreferences mSharedPreferences;
	// 引擎类型
	private String mEngineType = SpeechConstant.TYPE_CLOUD;

	private boolean mTranslateEnable = false;

	private BroadcastReceiver receiver = new BroadcastReceiver() {

		@Override
		public void onReceive(Context arg0, Intent intent) {
			String action = intent.getAction();
			if (action.equals("android.intent.action.VOICE_COMMAND")) {
				Log.i("main", "======你点击了按钮");
			}
		}
	};

	@SuppressLint("ShowToast")
	public void onCreate(Bundle savedInstanceState) {
		super.onCreate(savedInstanceState);
		requestWindowFeature(Window.FEATURE_NO_TITLE);
		setContentView(R.layout.iatdemo);
		initReceiver();
		initLayout();
		mIat = SpeechRecognizer.createRecognizer(IatDemo.this, mInitListener);
		mIatDialog = new RecognizerDialog(IatDemo.this, mInitListener);

		mSharedPreferences = getSharedPreferences(IatSettings.PREFER_NAME,
				Activity.MODE_PRIVATE);
		mToast = Toast.makeText(this, "", Toast.LENGTH_SHORT);
		mResultText = ((EditText) findViewById(R.id.iat_text));
	}

	private void initReceiver() {
		IntentFilter filter = new IntentFilter();
		filter.addAction("android.intent.action.VOICE_COMMAND");
		registerReceiver(receiver, filter);
	}

	/**
	 * 初始化Layout。
	 */
	AudioManager mAudioManager;

	private void initLayout() {
		mAudioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
		findViewById(R.id.iat_recognize).setOnClickListener(IatDemo.this);
		findViewById(R.id.iat_recognize_stream)
				.setOnClickListener(IatDemo.this);
		findViewById(R.id.iat_stop).setOnClickListener(IatDemo.this);
		findViewById(R.id.iat_cancel).setOnClickListener(IatDemo.this);
		findViewById(R.id.btn_test).setOnClickListener(IatDemo.this);
	}

	int ret = 0; // 函数调用返回值

	@Override
	public void onClick(View view) {
		if (null == mIat) {
			this.showTip("创建对象失败,请确认 libmsc.so 放置正确,且有调用 createUtility 进行初始化");
			return;
		}

		switch (view.getId()) {
		case R.id.iat_recognize:
			mAudioManager.setBluetoothScoOn(true);
			mAudioManager.startBluetoothSco();

			// 移动数据分析,收集开始听写事件
			mResultText.setText(null);// 清空显示内容
			mIatResults.clear();
			// 设置参数
			setParam();
			boolean isShowDialog = mSharedPreferences.getBoolean(
					getString(R.string.pref_key_iat_show), true);
			if (isShowDialog) {
				// 显示听写对话框
				mIatDialog.setListener(mRecognizerDialogListener);
				mIatDialog.show();
				showTip(getString(R.string.text_begin));
			} else {
				// 不显示听写对话框
				ret = mIat.startListening(mRecognizerListener);
				if (ret != ErrorCode.SUCCESS) {
					showTip("听写失败,错误码:" + ret);
				} else {
					showTip(getString(R.string.text_begin));
				}
			}
			break;

		case R.id.btn_test:
			mResultText.setText(null);// 清空显示内容
			mIatResults.clear();
			// 设置参数
			setParam();
			// 设置音频来源为外部文件
			// mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
			// 也可以像以下这样直接设置音频文件路径识别(要求设置文件在sdcard上的全路径):
			mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-2");
			mIat.setParameter(SpeechConstant.ASR_SOURCE_PATH,
					"sdcard/record.pcm");
			ret = mIat.startListening(mRecognizerListener);
			if (ret != ErrorCode.SUCCESS) {
				showTip("识别失败,错误码:" + ret);
			} else {
				byte[] audioData = FucUtil.readAudioFile(IatDemo.this,
						"iattest.wav");
				Log.i("main", "====获取的数据流===" + audioData.length);
				if (null != audioData) {
					showTip(getString(R.string.text_begin_recognizer));
					// 一次(也可以分多次)写入音频文件数据,数据格式必须是采样率为8KHz或16KHz(本地识别只支持16K采样率,云端都支持),位长16bit,单声道的wav或者pcm
					// 写入8KHz采样的音频时,必须先调用setParameter(SpeechConstant.SAMPLE_RATE,
					// "8000")设置正确的采样率
					// 注:当音频过长,静音部分时长超过VAD_EOS将导致静音后面部分不能识别。
					// 音频切分方法:FucUtil.splitBuffer(byte[] buffer,int length,int
					// spsize);
					mIat.writeAudio(audioData, 0, audioData.length);
					mIat.stopListening();
				} else {
					mIat.cancel();
					showTip("读取音频流失败");
				}
			}
			break;
		// 音频流识别
		case R.id.iat_recognize_stream:
			mResultText.setText(null);// 清空显示内容
			mIatResults.clear();
			// 设置参数
			setParam();
			// 设置音频来源为外部文件
			mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
			// 也可以像以下这样直接设置音频文件路径识别(要求设置文件在sdcard上的全路径):
			// mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-2");
			// mIat.setParameter(SpeechConstant.ASR_SOURCE_PATH,
			// "sdcard/record.pcm");
			ret = mIat.startListening(mRecognizerListener);
			if (ret != ErrorCode.SUCCESS) {
				showTip("识别失败,错误码:" + ret);
			} else {
				// byte[] audioData = FucUtil.readAudioFile(IatDemo.this,
				// "record.wav");
				byte[] audioData = FucUtil.readSdFile(IatDemo.this);
				Log.i("main", "====获取的数据流===" + audioData.length);
				if (null != audioData) {
					showTip(getString(R.string.text_begin_recognizer));
					// 一次(也可以分多次)写入音频文件数据,数据格式必须是采样率为8KHz或16KHz(本地识别只支持16K采样率,云端都支持),位长16bit,单声道的wav或者pcm
					// 写入8KHz采样的音频时,必须先调用setParameter(SpeechConstant.SAMPLE_RATE,
					// "8000")设置正确的采样率
					// 注:当音频过长,静音部分时长超过VAD_EOS将导致静音后面部分不能识别。
					// 音频切分方法:FucUtil.splitBuffer(byte[] buffer,int length,int
					// spsize);
					mIat.setParameter(SpeechConstant.VAD_BOS, "4000");
					mIat.setParameter(SpeechConstant.VAD_EOS, "4000");
					mIat.setParameter(SpeechConstant.SAMPLE_RATE, "8000");
					mIat.writeAudio(audioData, 0, audioData.length);
					mIat.stopListening();
				} else {
					mIat.cancel();
					showTip("读取音频流失败");
				}
			}
			break;
		// 停止听写
		case R.id.iat_stop:
			mIat.stopListening();
			showTip("停止听写");
			break;
		// 取消听写
		case R.id.iat_cancel:

			mAudioManager.setBluetoothScoOn(false);
			mAudioManager.stopBluetoothSco();
			mIat.cancel();
			showTip("取消听写");
			break;
		default:
			break;
		}
	}

	/**
	 * 初始化监听器。
	 */
	private InitListener mInitListener = new InitListener() {

		@Override
		public void onInit(int code) {
			Log.d(TAG, "SpeechRecognizer init() code = " + code);
			if (code != ErrorCode.SUCCESS) {
				showTip("初始化失败,错误码:" + code);
			}
		}
	};

	/**
	 * 上传联系人/词表监听器。
	 */
	private LexiconListener mLexiconListener = new LexiconListener() {

		@Override
		public void onLexiconUpdated(String lexiconId, SpeechError error) {
			if (error != null) {
				showTip(error.toString());
			} else {
				showTip(getString(R.string.text_upload_success));
			}
		}
	};

	/**
	 * 听写监听器。
	 */
	private RecognizerListener mRecognizerListener = new RecognizerListener() {

		@Override
		public void onBeginOfSpeech() {
			// 此回调表示:sdk内部录音机已经准备好了,用户可以开始语音输入
			showTip("开始说话");
		}

		@Override
		public void onError(SpeechError error) {
			// Tips:
			// 错误码:10118(您没有说话),可能是录音机权限被禁,需要提示用户打开应用的录音权限。
			// 如果使用本地功能(语记)需要提示用户开启语记的录音权限。
			if (mTranslateEnable && error.getErrorCode() == 14002) {
				showTip(error.getPlainDescription(true) + "\n请确认是否已开通翻译功能");
			} else {
				showTip(error.getPlainDescription(true));
			}
		}

		@Override
		public void onEndOfSpeech() {
			// 此回调表示:检测到了语音的尾端点,已经进入识别过程,不再接受语音输入
			showTip("结束说话");
		}

		@Override
		public void onResult(RecognizerResult results, boolean isLast) {
			Log.d(TAG, results.getResultString());
			if (mTranslateEnable) {
				printTransResult(results);
			} else {
				printResult(results);
			}

			if (isLast) {
				// TODO 最后的结果
			}
		}

		@Override
		public void onVolumeChanged(int volume, byte[] data) {
			showTip("当前正在说话,音量大小:" + volume);
			Log.d(TAG, "返回音频数据:" + data.length);
		}

		@Override
		public void onEvent(int eventType, int arg1, int arg2, Bundle obj) {
			// 以下代码用于获取与云端的会话id,当业务出错时将会话id提供给技术支持人员,可用于查询会话日志,定位出错原因
			// 若使用本地能力,会话id为null
			// if (SpeechEvent.EVENT_SESSION_ID == eventType) {
			// String sid = obj.getString(SpeechEvent.KEY_EVENT_SESSION_ID);
			// Log.d(TAG, "session id =" + sid);
			// }
		}
	};

	private void printResult(RecognizerResult results) {
		String text = JsonParser.parseIatResult(results.getResultString());

		String sn = null;
		// 读取json结果中的sn字段
		try {
			JSONObject resultJson = new JSONObject(results.getResultString());
			sn = resultJson.optString("sn");
		} catch (JSONException e) {
			e.printStackTrace();
		}

		mIatResults.put(sn, text);

		StringBuffer resultBuffer = new StringBuffer();
		for (String key : mIatResults.keySet()) {
			resultBuffer.append(mIatResults.get(key));
		}

		mResultText.setText(resultBuffer.toString());
		mResultText.setSelection(mResultText.length());
	}

	/**
	 * 听写UI监听器
	 */
	private RecognizerDialogListener mRecognizerDialogListener = new RecognizerDialogListener() {
		public void onResult(RecognizerResult results, boolean isLast) {
			if (mTranslateEnable) {
				printTransResult(results);
			} else {
				printResult(results);
			}

		}

		/**
		 * 识别回调错误.
		 */
		public void onError(SpeechError error) {
			if (mTranslateEnable && error.getErrorCode() == 14002) {
				showTip(error.getPlainDescription(true) + "\n请确认是否已开通翻译功能");
			} else {
				showTip(error.getPlainDescription(true));
			}
		}

	};

	/**
	 * 获取联系人监听器。
	 */
	private ContactListener mContactListener = new ContactListener() {

		@Override
		public void onContactQueryFinish(final String contactInfos,
				boolean changeFlag) {
			// 注:实际应用中除第一次上传之外,之后应该通过changeFlag判断是否需要上传,否则会造成不必要的流量.
			// 每当联系人发生变化,该接口都将会被回调,可通过ContactManager.destroy()销毁对象,解除回调。
			// if(changeFlag) {
			// 指定引擎类型
			runOnUiThread(new Runnable() {
				public void run() {
					mResultText.setText(contactInfos);
				}
			});

			mIat.setParameter(SpeechConstant.ENGINE_TYPE,
					SpeechConstant.TYPE_CLOUD);
			mIat.setParameter(SpeechConstant.TEXT_ENCODING, "utf-8");
			ret = mIat.updateLexicon("contact", contactInfos, mLexiconListener);
			if (ret != ErrorCode.SUCCESS) {
				showTip("上传联系人失败:" + ret);
			}
		}
	};

	private void showTip(final String str) {
		mToast.setText(str);
		mToast.show();
	}

	/**
	 * 参数设置
	 * 
	 * @param param
	 * @return
	 */
	public void setParam() {
		// 清空参数
		mIat.setParameter(SpeechConstant.PARAMS, null);

		// 设置听写引擎
		mIat.setParameter(SpeechConstant.ENGINE_TYPE, mEngineType);
		// 设置返回结果格式
		mIat.setParameter(SpeechConstant.RESULT_TYPE, "json");

		this.mTranslateEnable = mSharedPreferences.getBoolean(
				this.getString(R.string.pref_key_translate), false);
		if (mTranslateEnable) {
			Log.i(TAG, "translate enable");
			mIat.setParameter(SpeechConstant.ASR_SCH, "1");
			mIat.setParameter(SpeechConstant.ADD_CAP, "translate");
			mIat.setParameter(SpeechConstant.TRS_SRC, "its");
		}

		String lag = mSharedPreferences.getString("iat_language_preference",
				"mandarin");
		if (lag.equals("en_us")) {
			// 设置语言
			mIat.setParameter(SpeechConstant.LANGUAGE, "en_us");
			mIat.setParameter(SpeechConstant.ACCENT, null);

			if (mTranslateEnable) {
				mIat.setParameter(SpeechConstant.ORI_LANG, "en");
				mIat.setParameter(SpeechConstant.TRANS_LANG, "cn");
			}
		} else {
			// 设置语言
			mIat.setParameter(SpeechConstant.LANGUAGE, "zh_cn");
			// 设置语言区域
			mIat.setParameter(SpeechConstant.ACCENT, lag);

			if (mTranslateEnable) {
				mIat.setParameter(SpeechConstant.ORI_LANG, "cn");
				mIat.setParameter(SpeechConstant.TRANS_LANG, "en");
			}
		}

		// 设置语音前端点:静音超时时间,即用户多长时间不说话则当做超时处理
		mIat.setParameter(SpeechConstant.VAD_BOS,
				mSharedPreferences.getString("iat_vadbos_preference", "4000"));

		// 设置语音后端点:后端点静音检测时间,即用户停止说话多长时间内即认为不再输入, 自动停止录音
		mIat.setParameter(SpeechConstant.VAD_EOS,
				mSharedPreferences.getString("iat_vadeos_preference", "1000"));

		// 设置标点符号,设置为"0"返回结果无标点,设置为"1"返回结果有标点
		mIat.setParameter(SpeechConstant.ASR_PTT,
				mSharedPreferences.getString("iat_punc_preference", "1"));

		// 设置音频保存路径,保存音频格式支持pcm、wav,设置路径为sd卡请注意WRITE_EXTERNAL_STORAGE权限
		// 注:AUDIO_FORMAT参数语记需要更新版本才能生效
		mIat.setParameter(SpeechConstant.AUDIO_FORMAT, "wav");
		mIat.setParameter(SpeechConstant.ASR_AUDIO_PATH,
				Environment.getExternalStorageDirectory() + "/msc/iat.wav");
	}

	private void printTransResult(RecognizerResult results) {
		String trans = JsonParser.parseTransResult(results.getResultString(),
				"dst");
		String oris = JsonParser.parseTransResult(results.getResultString(),
				"src");

		if (TextUtils.isEmpty(trans) || TextUtils.isEmpty(oris)) {
			showTip("解析结果失败,请确认是否已开通翻译功能。");
		} else {
			mResultText.setText("原始语言:\n" + oris + "\n目标语言:\n" + trans);
		}

	}

	@Override
	protected void onDestroy() {
		super.onDestroy();
		if (null != mIat) {
			mIat.cancel();
			mIat.destroy();
		}
		unregisterReceiver(receiver);
	}

}














2017-11-14 17:05:48 u010483897 阅读数 2096
近阶段在开发一款app,实现通过蓝牙耳机进行讯飞语音识别,获取识别结果之后再通过语音合成从蓝牙耳机播报出识别结果。上网也查了很多资料,大多是说通过一下两行代码:      mAudioManager.setBluetoothScoOn(true);                           mAudioManager.startBluetoothSco();
来打开蓝牙连接。
我在自己的代码中也进行了以下修改:


但是,奇怪的是,现在倒是可以通过蓝牙耳机进行语音识别了,但是语音合成结果播报却还是通过手机播放器播放的。请问,怎么修改才能使语音识别和合成播放都通过蓝牙耳机啊?看到的各位同志请指点一下吧,谢谢了。
2017-03-05 15:59:37 VR_Utopia 阅读数 1760

由于hololens只支持英语的语音识别,而本人的项目需要用到中文语音识别,考虑到讯飞是国内比较著名的语音识别技术供应商,因此尝试将讯飞的windows SDK嫁接到unity3d中进行开发。而结果很遗憾,该程序能在unity 的调试中运行,但是在hololens中无法正常运行,原因可能是讯飞的sdk不能支持win10的缘故吧。只能够等待讯飞那边进行sdk更新。在尝试使用讯飞开发的过程中,由于unity提供的音频格式与讯飞的需要的不同,导致进度一度停滞,经过一番努力终于摸索出了规律,现在分享一下。

实现讯飞语音识别,需要向讯飞服务器上传音频数据,由于讯飞需要该音频数据格式为 pcm 格式,16k采样率,16位数据,单声道,而unity 通过麦克风获取的音频流数据为32位的浮点型双声道音频数组,因此需要将数据转换才能使用,因此流程如下:

1.通过unity获取麦克风音频数据

2.设定采样周期,采样数,处理周期

3.将32位浮点双声道数组转换成16位单声道音频数组

1.unity 获取 麦克风 音频流数据

首先我这个项目实现的功能是这样的,实时进行语音转写。即不能是通过按钮来录取一段音频上传上去识别,而是实时地从麦克风中获取数据上传。因此,需要读取音频流(AudioStream)。在网上提供的方案是通过第三方插件Naudio进行麦克风缓存的获取,但经过测试,Naudio在unity中编译不了,PS:正确来说是Naudio不知UWP编译,因此放弃了该方案,后来查找holoToolkit的Input组件,其提供了MicStream脚本来帮助读取麦克风中的缓存。以下是代码

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in the project root for license information.


using System.Runtime.InteropServices;
using UnityEngine;
using System.Text;


namespace HoloToolkit.Unity.InputModule
{
    public class MicStream
    {
        // This class replaces Unity's Microphone object
        // This class is made for HoloLens mic stream selection, but should work well on all windows 10 devices
        // chooses from one of three possible microphone modes on HoloLens
        // There is an example of how to use this script in HoloToolkit\Input\Tests\Scripts\MicStreamDemo.cs


        // Streams: LOW_QUALITY_VOICE is optimized for speech analysis, COMMUNICATIONS is higher quality voice and is probably preferred
        //          ROOM_CAPTURE tries to get the sounds of the room more than the voice of the suer
        // can only be set on initialization
        public enum StreamCategory { LOW_QUALITY_VOICE, HIGH_QUALITY_VOICE, ROOM_CAPTURE }


        public enum ErrorCodes { ALREADY_RUNNING = -10, NO_AUDIO_DEVICE, NO_INPUT_DEVICE, ALREADY_RECORDING, GRAPH_NOT_EXIST, CHANNEL_COUNT_MISMATCH, FILE_CREATION_PERMISSION_ERROR, NOT_ENOUGH_DATA, NEED_ENABLED_MIC_CAPABILITY };


        const int MAX_PATH = 260; // 260 is maximum path length in windows, to be returned when we MicStopRecording


        [UnmanagedFunctionPointer(CallingConvention.StdCall)] // If included in MicStartStream, this callback will be triggered when audio data is ready. This is not the preferred method for Game Engines and can probably be ignored.
        public delegate void LiveMicCallback();


        /// 
        /// Called before calling MicStartStream or MicstartRecording to initialize microphone
        /// 
        /// One of the entries in the StreamCategory enumeratio
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicInitializeDefault(int category);


        /// 
        /// Called before calling MicStartStream or MicstartRecording to initialize microphone
        /// 
        /// One of the entries in the StreamCategory enumeration
        /// Desired number of samples per second
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicInitializeCustomRate(int category, int samplerate);


        /// 
        /// Call this to start receiving data from a microphone. Then, each frame, call MicGetFrame.
        /// 
        /// If true, all data will stay in the queue, if the client code is running behind. This can lead to significant audio lag, so is not appropriate for low-latency situations like reall-time voice chat.
        /// If true, the audio from the microphone will be played through your speakers.
        /// Optional (can be null): This callback will be called when data is ready for MicGetFrame
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicStartStream(bool keepData, bool previewOnDevice, LiveMicCallback micsignal);


        /// 
        /// Call this to start receiving data from a microphone. Then, each frame, call MicGetFrame.
        /// 
        /// If true, all data will stay in the queue, if the client code is running behind. This can lead to significant audio lag, so is not appropriate for low-latency situations like reall-time voice chat.
        /// If true, the audio from the microphone will be played through your speakers.
        /// error code or 0
        public static int MicStartStream(bool keepData, bool previewOnDevice)
        {
            return MicStartStream(keepData, previewOnDevice, null);
        }


        /// 
        /// Shuts down the connection to the microphone. Data will not longer be received from the microphone.
        /// 
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicStopStream();


        /// 
        /// Begins recording microphone data to the specified file.
        /// 
        /// The file will be saved to this name. Specify only the wav file's name with extensions, aka "myfile.wav", not full path
        /// If true, will play micstream in speakers
        /// 
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicStartRecording(string filename, bool previewOnDevice);


        /// 
        /// Finishes writing the file recording started with MicStartRecording.
        /// 
        /// returns the full path to the recorded audio file
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern void MicStopRecording(StringBuilder sb);


        /// 
        /// Finishes writing the file recording started with MicStartRecording.
        /// 
        /// the full path to the recorded audio file
        public static string MicStopRecording()
        {
            StringBuilder builder = new StringBuilder(MAX_PATH); 
            MicStopRecording(builder);
            return builder.ToString();
        }


        /// 
        /// Cleans up data associated with microphone recording. Counterpart to MicInitialize*
        /// 
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicDestroy();


        /// 
        /// Pauses streaming of microphone data to MicGetFrame (and/or file specified with MicStartRecording)
        /// 
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]


        public static extern int MicPause();


        /// 
        /// Unpauses streaming of microphone data to MicGetFrame (and/or file specified with MicStartRecording)
        /// 
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicResume();


        /// 
        /// Sets apmlification factor for microphone samples returned by MicGetFrame (and/or file specified with MicStartRecording)
        /// 
        /// gain factor
        /// error code or 0
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicSetGain(float g);


        /// 
        /// Queries the default microphone audio frame sample size. Useful if doing default initializations with callbacks to know how much data it wants to hand you.
        /// 
        /// the number of samles in the default audio buffer
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicGetDefaultBufferSize();


        /// 
        /// Queries the number of channels supported by the microphone.  Useful if doing default initializations with callbacks to know how much data it wants to hand you.
        /// 
        /// the number of channels
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicGetDefaultNumChannels();


        /// 
        /// Read from the microphone buffer. Usually called once per frame.
        /// 
        /// the buffer into which to store the microphone audio samples
        /// the length of the buffer
        /// the number of audio channels to store in the buffer
        /// error code (or 0 if no error)
        [DllImport("MicStreamSelector", ExactSpelling = true)]
        public static extern int MicGetFrame(float[] buffer, int length, int numchannels);


        /// 
        /// Prints useful error/warning messages based on error codes returned from the functions in this class
        /// 
        /// An error code returned by another function in this class
        /// True if no error or warning message was printed, false if a message was printed
        public static bool CheckForErrorOnCall(int returnCode)
        {
            switch (returnCode)
            {
                case (int)ErrorCodes.ALREADY_RECORDING:
                    Debug.LogError("WARNING: Tried to start recording when you were already doing so. You need to stop your previous recording before you can start again.");
                    return false;
                case (int)ErrorCodes.ALREADY_RUNNING:
                    Debug.LogError("WARNING: Tried to initialize microphone more than once");
                    return false;
                case (int)ErrorCodes.GRAPH_NOT_EXIST:
                    Debug.LogError("ERROR: Tried to do microphone things without a properly initialized microphone. \n Do you have a mic plugged into a functional audio system and did you call MicInitialize() before anything else ??");
                    return false;
                case (int)ErrorCodes.NO_AUDIO_DEVICE:
                    Debug.LogError("ERROR: Tried to start microphone, but you don't appear to have a functional audio device. check your OS audio settings.");
                    return false;
                case (int)ErrorCodes.NO_INPUT_DEVICE:
                    Debug.LogError("ERROR: Tried to start microphone, but you don't have one plugged in, do you?");
                    return false;
                case (int)ErrorCodes.CHANNEL_COUNT_MISMATCH:
                    Debug.LogError("ERROR: Microphone had a channel count mismatch internally on device. Try setting different mono/stereo options in OS mic settings.");
                    return false;
                case (int)ErrorCodes.FILE_CREATION_PERMISSION_ERROR:
                    Debug.LogError("ERROR: Didn't have access to create file in Music library. Make sure permissions to write to Music library are set granted.");
                    return false;
                case (int)ErrorCodes.NOT_ENOUGH_DATA:
                    // usually not an error, means the device hasn't produced enough data yet because it just started running
                    return false;
                case (int)ErrorCodes.NEED_ENABLED_MIC_CAPABILITY:
                    Debug.LogError("ERROR: Seems like you forgot to enable the microphone capabilities in your Unity permissions");
                    return false;
            }
            return true;
        }
    }
}

该脚本是引用微软写的microStreamSelector的dll,该脚本的具体方法就不在这里叙述,有兴趣可以自己在holotoolkit处查阅。现在调用的是里面的 int MicGetFrame(float[] buffer, int length, int numchannels)方法,该方法可以获取一个长度为length的float型的buffer数组,该数组是麦克风的音频数据,而numchannels是指获取的数据的声道数,通常为2,即双声道数据。该方法的是按每帧调取音频数据,因此最好在fixeupdate()中调用, 其中还有一个重要的方法int MicInitializeCustomRate(int category, int samplerate),该方法是设定采样频率,这决定了一帧最多可以获取数据长度的最大值,我们将其设定为16000(稍后再说为何设定这个值),即在1s内最多可以获取长度为16000的float型数组,这是其最大值。


通过音频流就可以从hololens的麦克风中获取到音频数据。这就是通过unity获取hololens麦克风音频数据的方法。


2.设定采样周期,采样数

下面贴出代码

  private void FixedUpdate()
    {
        
        if (IsRecord)
        {
            float[] buffer = new float[640];
            MicStream.CheckForErrorOnCall(MicStream.MicGetFrame(buffer, 640, 2));
            convertBuffer(getMonoData(buffer));
            test.Add(buffer);
           
       
            if (CanProcess)
            {

                ProcessData(resultsample);
               
            }
            
        }
        
    }
为何缓存数组要设定为640?

讯飞需要的音频格式pcm格式,16k采样率,16位数据,单声道数据,因此需要设定麦克风的采样频率为16000,所以int MicInitializeCustomRate(int category, int samplerate)的采样频率要设定为16000。由于 int MicGetFrame(float[] buffer, int length, int numchannels)方法需要在每帧中读取,考虑到固定间隔时间读取,因此在fixedupdate()调用比较好。在fixedupdate()中需要对音频数据的缓存数组大小的估算,在unity默认中,fixedupdate()的调用周期20ms,而16k采样率就是1s采样16000个数据,因此同过计算得出20ms读取到320个数据,而由于unity获取的是双声道数据,因此缓存数组需要在320基础上乘以2,为640。因此音频缓存数组的大小为640。


3.将32位浮点双声道数组转换成16位单声道音频数组

现在需要将unity32位浮点数据双声道格式转换至16位单声道格式,16位由两个8位表示并将其以下为转换算法

    /// 
    /// 双声道音频数据转单声道
    /// 
    /// 
    /// 
    private float[] getMonoData(float[] buffer) {
        if (buffer.Length < 640) {

            Debug.Log("音频样本数据长度不足1280");

            return null;
        }
        
        float[] result = new float[320];

        for (int i = 0,j=0; i < 640; i += 2,j++)
        {
            result[j] = buffer[i];

        }

        return result;
    }/// 
    /// 处理语音数据上传讯飞服务器
    /// 
    /// 
    private void ProcessData(float[] buffer) {
        int ret=-1;
        short two;
        byte[] newArray16Bit = new byte[buffer.Length * 2];
        short[] arrayshort = new short[buffer.Length];
        for (int i = 0, j = 0; i < buffer.Length; i++, j += 2)
        {

            two = (short)(buffer[i] * short.MaxValue);
            arrayshort[i] = two;
            newArray16Bit[j] = (byte)(two & 0xff);
            newArray16Bit[j + 1] = (byte)((two >> 8) & 0xff);

        }

        IntPtr bp = Marshal.AllocHGlobal(newArray16Bit.Length);
        Marshal.Copy(newArray16Bit, 0, bp, newArray16Bit.Length);
        uint bytelength = (uint)newArray16Bit.Length;



        if (this.Ep_status == EpStatus.ISR_EP_NULL)
        {

            ret = ASRDLL.QISRAudioWrite(sess_Appid, bp, bytelength, AudioStatus.ISR_AUDIO_SAMPLE_FIRST, ref this.Ep_status, ref this.Rec_status);
        }
        else if (this.Ep_status == EpStatus.ISR_EP_LOOKING_FOR_SPEECH)
        {
            ret = ASRDLL.QISRAudioWrite(sess_Appid, bp, bytelength, AudioStatus.ISR_AUDIO_SAMPLE_CONTINUE, ref this.Ep_status, ref this.Rec_status);
        }
        else if (this.Ep_status == EpStatus.ISR_EP_IN_SPEECH)
        {
            ret = ASRDLL.QISRAudioWrite(sess_Appid, bp, bytelength, AudioStatus.ISR_AUDIO_SAMPLE_CONTINUE, ref this.Ep_status, ref this.Rec_status);

        }

        CanProcess = false;
        Debug.Log("读取错误代码:" + ret.ToString());
        Debug.Log("读取状态:" + Ep_status.ToString());
        text.text+=Ep_status.ToString();
        Marshal.FreeHGlobal(bp);

    }

在网上,32位浮点型数据通常是由左右声道的16位合并而成的,但是unity的音频数据的浮点只是音频值的大小,范围在【-1,1】之间,左右声道分别交错排列,因此单声道获取,只要间隔地读取数据即可。而由于浮点值是声音的大小值,因此要将其转换成16位数据,需要将该浮点数和16位的最大值(short.max)相乘,则可以得到16位的音频数据


2013-04-21 23:29:47 zhandoushi1982 阅读数 26627

        Android语音识别,借助于云端技术可以识别用户的语音输入,包括语音控制等技术,下面我们将利用Google提供的Api实现这一功能。功能点为:通过用户语音将用户输入的语音识别出来,并打印在列表上。

       首先建立如下的一个activity,并在button下建立一个listview控件(用来回显语言识别到的项目)。

      代码如下:

package com.example.speechreg;

import android.app.Activity;
import android.content.Intent;
import android.content.pm.PackageManager;   //用于查找本地应用软件包相关的类
import android.content.pm.ResolveInfo;      //从系统IntentFilter返回的信息
import android.os.Bundle;
import android.speech.RecognizerIntent;     //语音识别类
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.ArrayAdapter;         //相同数据用数组存储的匹配转化类
import android.widget.Button;
import android.widget.ListView;
 
import java.util.ArrayList;
import java.util.List;
 
/**
 * Sample code that invokes the speech recognition intent API.
 */
public class MainActivity extends Activity implements OnClickListener {
    private static final int VOICE_RECOGNITION_REQUEST_CODE = 1234;   //startActivityForResult操作要求的标识码
    private ListView mList;
 
    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.voice_recognition);
        Button speakButton = (Button) findViewById(R.id.btn_speak);
        mList = (ListView) findViewById(R.id.listView1);    
 
        PackageManager pm = getPackageManager();
        List<ResolveInfo> activities = pm.queryIntentActivities(new Intent(
                RecognizerIntent.ACTION_RECOGNIZE_SPEECH), 0);  //通过全局包管理器及特定intent,查找系统是否有语音识别的服务程序
        if (activities.size() != 0) {
            speakButton.setOnClickListener(this);	//如果存在该activity
        } else {
            speakButton.setEnabled(false);
            speakButton.setText("Recognizer not present");    //否则将BUTTON显示值修改,并设置成不可选
        }
    }
 
    public void onClick(View v) {    //OnClickListener中的要override的函数
        if (v.getId() == R.id.btn_speak) {
            startVoiceRecognitionActivity();
        }
    }
 
    private void startVoiceRecognitionActivity() {
        Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        intent.putExtra(RecognizerIntent.EXTRA_PROMPT,
                "Speech recognition demo");    //设置语音识别Intent调用的特定属性参数
        startActivityForResult(intent, VOICE_RECOGNITION_REQUEST_CODE);   启动一个要求有返回值的activity调用
    }
 
    @Override
    protected void onActivityResult(int requestCode, int resultCode, Intent data) {  //该函数非接口内也非抽象函数,为何会Override?
        if (requestCode == VOICE_RECOGNITION_REQUEST_CODE
                && resultCode == RESULT_OK) {
            // Fill the list view with the strings the recognizer thought it
            // could have heard
            ArrayList<String> matches = data
                    .getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);  //解析存储识别返回的结果
            mList.setAdapter(new ArrayAdapter<String>(this,
                    android.R.layout.simple_list_item_1, matches));  //在listview中显示结果
        }
 
        super.onActivityResult(requestCode, resultCode, data);
    }
}

     运行结果如下,它会自动调用谷歌语音识别的服务activity,并返回特定activity执行的结果。

=========================================讯飞平台语音识别语音播报==========================

在网上看到一篇中文语音识别、控制,语音合成的好文章。分享地址http://blog.csdn.net/gao_chun/article/details/46377505



参考原文:http://blog.csdn.net/comkingfly/article/details/7359987

参考原文:http://software.intel.com/zh-cn/articles/developing-android-applications-with-voice-recognition-features/?utm_campaign=CSDN&utm_source=intel.csdn.net&utm_medium=Link&utm_content=intelandroid-yuyinshibie

2015-04-18 13:04:09 Ec_boy_HL 阅读数 9411

         第一次在csdn写博客,在此把自己折腾了两三天的一个问题的解决过程记录一下。

       背景:本人负责公司android平台的app开发,最近要开发一个语音助手类的app,类似于灵犀语音助手、虫洞语音助手等。其中有两个蓝牙耳机下的语音识别问题,比较折腾人,问题描述:1.蓝牙耳机连接下捕获蓝牙按键事件,启动语音识别;2.正常启动识别时也必须通过蓝牙耳机录入音频进行语音识别。这两个问题,测试发现灵犀语音助手都解决了,所以本人负责的这个app也必须解决。网上搜了相关的资料,基本上是凤毛麟角,因此本人在此贡献一点小发现供大家参考,如有不对的地方欢迎指正。

       针对第一个问题,蓝牙耳机的按键监听,墙内墙外的资料搜遍,没有发现完美的解决方案(这里看到有人提出的解决办法:http://blog.csdn.net/kangear/article/details/40430673,感觉有点另类,而且也不适合我的app的应用场景,所以没尝试),虽然接听键(该键还有很多功能,不细说,以下都称接听键)的单按、双按没法监听,但是长按却是可以捕获到,默认情况下,已经连接到android手机的蓝牙耳机,长按接听键几秒后会系统会发出一个action=android.intent.action.VOICE_COMMAND的Intent,灵犀语音助手就是使用这个来监听长按的,既然如此,我就仿照灵犀来做吧:

       1.首先,在AndroidManifest.xml中指定的一个activity (用于捕获蓝牙耳机长按事件的activity,以下以A代替之)中添加:

         <intent-filter android:priority="2147483647">
            <action android:name="android.intent.action.VOICE_COMMAND" />
            <category android:name="android.intent.category.DEFAULT" />
           </intent-filter>

当在连接了蓝牙耳机的情况下,长按接听键几秒,出现提示音后(请戴着蓝牙耳机按,要不听不见,一不小心就成关机了),马上松开,就会弹出一个选择启动某个app的对话框,凡是添加了以上intent-filter的activity的app都会出现对话框中,这需要引导用户选择你的app并选择始终启动你的app(注意A的launchMode,我这里建议设成singleTask),选中确定之后你的app就会被启动,如果A还没有创建,那A自然会被创建啦,如果A已经被创建了,则调用A的onNewIntent(Intent intent)方法,因此你只要在A中检查接收到的intent的action就能监听蓝牙耳机的长按事件了。

       2.关于蓝牙耳机下的识别问题,本app用的语音识别sdk是讯飞的,针对这个问题讯飞有给出解决方法:

在调用语音识别引擎识别前,打开sco,关键代码:

       AudioManager mAudioManager = (AudioManager)getSystemService(Context.AUDIO_SERVICE);
       mAudioManager.setBluetoothScoOn(true);
       mAudioManager.startBluetoothSco();

识别启动并识别完成后,关闭sco:

       mAudioManager.setBluetoothScoOn(false);
       mAudioManager.stopBluetoothSco();

按照这个方法,便可以实现音频录入。

        当然你会问why,这里简单的介绍一下蓝牙耳机的两种链路,A2DP及SCO。android的api表明:A2DP是一种单向的高品质音频数据传输链路,通常用于播放立体声音乐;而SCO则是一种双向的音频数据的传输链路,该链路只支持8K及16K单声道的音频数据,只能用于普通语音的传输,若用于播放音乐那就只能呵呵了。两者的主要区别是:A2DP只能播放,默认是打开的,而SCO既能录音也能播放,默认是关闭的。既然要录音肯定要打开sco啦,因此识别前调用上面的代码就可以通过蓝牙耳机录音了,录完记得要关闭。

        虽然上面的方法能够实现录音,但测试中发现一个问题:startBluetoothSco()和stopBluetoothSco()时,蓝牙耳机都会有一个提示音,如果识别本身就有提示音,那么加上蓝牙的提示音就会让人莫名其妙了,在体验上很不友好。而本人在测试灵犀的蓝牙功能时竟发现没有提示音?为了完整的复制,必须把提示音去掉,然后我又上网搜了一遍,资料真的是凤毛麟角,没什么收获。无奈中翻翻android关于蓝牙部分的api,发现打开及关闭sco还有另外一种办法,那就是android.bluetooth.BluetoothHeadset类的startVoiceRecognition(BluetoothDevice device)及stopVoiceRecognition(BluetoothDevice device),经过测试发现,通过这两个方法打开sco及关闭sco蓝牙耳机是不会有提示音,题外说一句:讯飞真会坑!!!下面列出关键代码:

                //以下代码用于在已经连接蓝牙耳机的状态下获取BluetoothHeadset,监听蓝牙耳机的连接只需接收action=BluetoothAdapter.ACTION_CONNECTION_STATE_CHANGED的广播即可,此处不再赘述。

private BluetoothHeadset bluetoothHeadset;

BluetoothProfile.ServiceListener blueHeadsetListener=new BluetoothProfile.ServiceListener() {

@Override
public void onServiceDisconnected(int profile) {
Log.i("blueHeadsetListener", "onServiceDisconnected:"+profile);
if(profile==BluetoothProfile.HEADSET){
bluetoothHeadset=null;
}
}

@Override
public void onServiceConnected(int profile, BluetoothProfile proxy) {
Log.i("blueHeadsetListener", "onServiceConnected:"+profile);
if(profile==BluetoothProfile.HEADSET){
bluetoothHeadset=(BluetoothHeadset) proxy;
}
}
};


private void initBlueToothHeadset(){
         BluetoothAdapter adapter;
         if(android.os.Build.VERSION.SDK_INT<android.os.Build.VERSION_CODES.JELLY_BEAN_MR2){//android4.3之前直接用BluetoothAdapter.getDefaultAdapter()就能得到BluetoothAdapter
          adapter=BluetoothAdapter.getDefaultAdapter();
         }
         else{
          BluetoothManager bm=(BluetoothManager) getSystemService(Context.BLUETOOTH_SERVICE);
          adapter=bm.getAdapter();
         }
         adapter.getProfileProxy(context, blueHeadsetListener, BluetoothProfile.HEADSET);
 }


        
       如果要通过蓝牙耳机的sco链路输出音频,必须要在sco打开的状态下,把streamType设置为AudioManager.STREAM_VOICE_CALL。

       最后有两点要注意的:
     1.测试发现,当sco打开时,第1点中提到的蓝牙耳机接听键长按将不会发送android.intent.action.VOICE_COMMAND,必须在sco关闭时长按才会有这一事件。
     2.sco打开及关闭都是比较消耗时间的,特别是打开,大概是几百毫秒-几秒的时间,请注意要在sco彻底打开时再启动语音识别,sco的状态可以通过接收action=AudioManager.ACTION_SCO_AUDIO_STATE_UPDATED的广播监听,或者使用BluetoothHeadset的isAudioConnected(BluetoothDevice device)判断。测试发现BluetoothHeadset的相关方法打开sco比较快(个人感觉,不明真相,求大神科普)。

没有更多推荐了,返回首页