2017-08-15 12:57:29 qq_35376521 阅读数 1905
  • 基于模板的文字识别结果结构化处理技术

    OCR(文字识别)技术是目前常用的一种AI能力。但一般OCR的识别结果是一种按行输出的半结构化输出。本课程从百度自定义模板文字识别展开,从理论到案例,详细介绍OCR结构化的相关技术,并理清OCR和结构化之间的关系和适用场景。

    1200 人正在学习 去看看 CSDN讲师

android应用百度语音识别、语音合成和语音唤醒


  • 首先申请百度语音识别:http://yuyin.baidu.com/
  • 下载SDK

  • 有两种方式实现语音识别:API和SDK集成

  • 唤醒功能默认唤醒词是百度一下或者小度你好,可以到官网去自定义唤醒词
  • 语音合成下载的SDK里有详细的文档说明,按照里面的做就行了
  • 记得把SDK里面demo的assets和jniLibs复制到项目中,具体看github

  • 直接上代码吧

MainActivity.java

package com.example.ruanyulin.asr_tts;

import android.annotation.TargetApi;
import android.app.SearchManager;
import android.content.ActivityNotFoundException;
import android.content.ComponentName;
import android.content.Context;
import android.content.Intent;
import android.content.pm.PackageInfo;
import android.content.pm.PackageManager;
import android.content.pm.ResolveInfo;
import android.net.Uri;
import android.os.Build;
import android.os.Environment;
import android.os.Handler;
import android.provider.Contacts;
import android.provider.ContactsContract;
import android.provider.MediaStore;
import android.support.design.widget.FloatingActionButton;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.telecom.TelecomManager;
import android.util.AndroidRuntimeException;
import android.util.Log;
import android.view.View;
import android.widget.TextView;
import android.widget.Toast;

import com.baidu.speech.EventListener;
import com.baidu.speech.EventManager;
import com.baidu.speech.EventManagerFactory;
import com.baidu.tts.auth.AuthInfo;
import com.baidu.tts.client.SpeechError;
import com.baidu.tts.client.SpeechSynthesizer;
import com.baidu.tts.client.SpeechSynthesizerListener;
import com.baidu.tts.client.TtsMode;
//import com.example.administrator.baiduvoicetest.R;
import com.example.ruanyulin.asr_tts.R;

import org.json.JSONException;
import org.json.JSONObject;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MainActivity extends AppCompatActivity {

    private TextView textResult;
    private EventManager eventManager;
    private String mSampleDirPath; //本地路径
    private String app;
    private SpeechSynthesizer speechSynthesizer;
    private FloatingActionButton floatingActionButton;
    private FloatingActionButton fab_setting;

    //packageName
    private String weixinp = "com.tencent.mm";
    private String qqp = "com.tencent.mobileqq";
    private String zhihup = "com.zhihu.android";
    private String baidumapp = "com.baidu.BaiduMap";
    private String tiebap = "com.baidu.tieba";
    private String zhibo = "air.tv.douyu.android";
    private String train = "com.MobileTicket";
    private String transfor = "com.youdao.dict";
    private String taobao = "com.taobao.taobao";
    private String weibo = "com.sina.weibo";
    private String jingdong = "com.jingdong.app.mall";
    private String tianmao = "com.tmall.wireless";
    private String calendar = "com.android.calendar";
    private String clock = "com.android.alarmclock";
    private String calcu = "com.android.calculator2";


    private static final String TTS_TEXT_MODEL_FILE = "bd_etts_text.dat";
    private static final String TTS_SPEECH_MODEL_FILE = "bd_etts_speech_female.dat";
    private static final String TTS_SPEECH_MODEL1_FILE = "bd_etts_speech_male.dat";
    private static final String TTS_LICENSE_FILE = "temp_license";
    private static final String TTS_ENGLISH_SPEECH_MODEL1 = "bd_etts_speech_female_en.dat";
    private static final String TTS_ENGLISH_SPEECH_MODEL2 = "bd_etts_speech_male_en.dat";
    private static final String TTS_ENGLISH_TEXT_MODEL_FILE = "bd_etts_text_en.dat";
    private static final String TAG = "MainActivity";

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        //textResult = (TextView) findViewById();
        textResult = (TextView) findViewById(R.id.texx);
        floatingActionButton = (FloatingActionButton) findViewById(R.id.fab);
        fab_setting = (FloatingActionButton) findViewById(R.id.fab_setting);
        textResult.setText("说出指令可进行相关操作 \n" +
                "如 “QQ” 或者 “打开QQ” 则会打开手机QQ\n" +
                "“安装 xx” 则会搜索相关应用\n" +
                "请点击按钮或说出唤醒词启动语音助手\n" +
                "唤醒词为 “合工你好”");

        floatingActionButton.setOnClickListener(new View.OnClickListener() {
            @Override
            public void onClick(View view) {
                Intent intent = new Intent("com.baidu.action.RECOGNIZE_SPEECH");
                //设置离线授权文件
                intent.putExtra("grammar","assets:///baidu_speech_grammar.bsg");
                startActivityForResult(intent,1);
            }
        });

        //setting
        fab_setting.setOnClickListener(new View.OnClickListener() {
            @Override
            public void onClick(View view) {

            }
        });
        initEnv();
        initTTS();
        speak("欢迎使用语音助手,请点击按钮或说出唤醒词启动语音识别");

    }

    @Override
    protected void onStart() {
        super.onStart();

        //创建唤醒事件管理器
        eventManager = EventManagerFactory.create(MainActivity.this,"wp");
        //注册唤醒事件监听器
        eventManager.registerListener(new EventListener() {
            @Override
            public void onEvent(String name, String params, byte[] bytes, int i, int i1) {
                try{
                    JSONObject jsonObject = new JSONObject(params);
                    if ("wp.data".equals(name)){
                        Toast.makeText(MainActivity.this,"唤醒成功",Toast.LENGTH_SHORT).show();

                        speak("唤醒成功");
                        TimerTask task = new TimerTask() {
                            @Override
                            public void run() {
                                Intent intent = new Intent("com.baidu.action.RECOGNIZE_SPEECH");
                                //设置离线授权文件
                                intent.putExtra("grammar","asset:///baidu_speech_grammar.bsg");
                                startActivityForResult(intent,1);
                            }
                        };
                        Timer timer = new Timer();
                        timer.schedule(task,1500);


                    }else if ("wp.exit".equals(name)){
                        //Toast.makeText(MainActivity.this,"停止唤醒功能" ,Toast.LENGTH_SHORT).show();

                    }
                } catch (JSONException e) {
                    e.printStackTrace();
                }
            }
        });


        HashMap params = new HashMap();
        params.put("kws-file","assets:///WakeUp.bin");
        eventManager.send("wp.start",new JSONObject(params).toString(),null,0,0);
        //Toast.makeText(this,"启动唤醒",Toast.LENGTH_SHORT).show();

    }
    @Override protected void onResume() {
        super.onResume();

    }

    @Override
    protected void onPause() {
        super.onPause();
        //停止唤醒功能
        eventManager.send("wp.stop", null, null, 0, 0);
        //Toast.makeText(this,"唤醒关闭",Toast.LENGTH_SHORT).show();
    }

    @Override
    protected void onActivityResult(int requestCode,int resultCode,Intent data) {

        super.onActivityResult(requestCode,resultCode,data);
        if (resultCode == RESULT_OK) {
            Bundle result = data.getExtras();
            ArrayList<String> result_recognition = result.getStringArrayList("results_recognition");

            String results = result_recognition + "";
            String res = results.substring(results.indexOf("[")+1,results.indexOf("]"));
            app = res;
        }
        analy();
    }

    //分析指令-启动app
    @TargetApi(Build.VERSION_CODES.ECLAIR)
    private void analy(){
        String apppackage = null;
        if (app.indexOf("搜索") != -1){
            try {
                String search = app.substring(2);

                Intent intent = new Intent();
                intent.setAction(Intent.ACTION_WEB_SEARCH);
                intent.putExtra(SearchManager.QUERY,search);
                startActivity(intent);
                speak("正在搜索" + search);
            } catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("打电话") != -1){
            try {
                //String tel = app.substring(4);
                Pattern pattern = Pattern.compile("\\d+");
                Matcher matcher = pattern.matcher(app);
                matcher.find();
                Uri uri = Uri.parse("tel:" + matcher.group());
                Intent intent = new Intent(Intent.ACTION_DIAL,uri);
                startActivity(intent);
                speak("打电话给" + matcher.group());
            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("发短信") != -1){
            try {
                //String msg = app.substring(4);
                Pattern pattern = Pattern.compile("\\d+");
                Matcher matcher = pattern.matcher(app);
                matcher.find();
                Uri uri = Uri.parse("smsto:" + matcher.group());
                Intent intent = new Intent(Intent.ACTION_SENDTO,uri);
                intent.putExtra("sms_body","TheSMS text");
                startActivity(intent);
                speak("发短信给" + matcher.group());

            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("相机") != -1 || app.indexOf("照相") != -1){
            try {
                Intent intent = new Intent("android.media.action.STILL_IMAGE_CAMERA");
                speak("正在打开相机");
                startActivity(intent);
                speak("打开相机");
            }catch (Exception e){
                speak("系统出错");
            }finally {
                return;
            }

        } else if (app.indexOf("图片") != -1 || app.indexOf("照片") != -1 || app.indexOf("图库") != -1 ) {
            try {
                /*Intent intent = new Intent();
                intent.setType("image/*");
                intent.setAction(Intent.ACTION_GET_CONTENT);*/
                Intent intent1 = new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.EXTERNAL_CONTENT_URI);
                startActivity(intent1);

            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("录音") != -1) {
            try {
                Intent intent = new Intent(MediaStore.Audio.Media.RECORD_SOUND_ACTION);
                startActivity(intent);
                speak("正在录音");
            } catch (Exception e) {
                speak("系统出错");
            }

        } else if (app.indexOf("安装") != -1){
            try {
                String name = app.substring(2);
                Uri uri = Uri.parse("market://search?q=" + name);
                Intent intent = new Intent(Intent.ACTION_VIEW,uri);
                startActivity(intent);
                speak("正在安装" + name);
            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("联系人") != -1){
            try {
                Intent intent = new Intent(Intent.ACTION_PICK, ContactsContract.Contacts.CONTENT_URI);
                startActivity(intent);
                speak("正在打开联系人列表");
            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("音乐") != -1){
            try {
                Intent intent = new Intent(Intent.ACTION_PICK);
                intent.setDataAndType(Uri.EMPTY,"vnd.android.cursor.dir/playlist");
                intent.putExtra("withtabs",true);
                intent.addFlags(Intent.FLAG_ACTIVITY_CLEAR_TOP);

                Intent intent1 = Intent.createChooser(intent,"Choose an application to open with:");
                if (intent1 == intent){
                    startActivity(intent1);
                } else {
                    Intent intent2 = new Intent("android.intent.action.MUSIC_PLAYER");
                    startActivity(intent2);
                }
                speak("正在打开音乐播放器");

            }catch (Exception e){
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("视频") != -1) {
            try {
                Uri uri = Uri.withAppendedPath(MediaStore.Audio.Media.INTERNAL_CONTENT_URI,"1");
                Intent intent = new Intent(Intent.ACTION_VIEW,uri);
                //intent.setClassName("com.cooliris.media","com.cooliris.media.MovieView");
                startActivity(intent);
                speak("打开视频播放器");
            }catch (Exception e) {
                speak("系统出错");
            } finally {
                return;
            }

        } else if (app.indexOf("微信") != -1){
            apppackage = weixinp;
            speak("正在打开微信");
            turnApp(apppackage);
        } else if (app.indexOf("QQ") != -1){
            apppackage = qqp;
            speak("正在打开QQ");
            turnApp(apppackage);
        } else if (app.indexOf("知乎") != -1){
            apppackage = zhihup;
            speak("正在打开知乎");
            turnApp(apppackage);
        } else if (app.indexOf("贴吧") != -1) {
            apppackage = tiebap;
            speak("正在打开百度贴吧");
            turnApp(apppackage);
        } else if (app.indexOf("地图") != -1) {
            apppackage = baidumapp;
            speak("正在打开地图");
            turnApp(apppackage);
        } else if (app.indexOf("购物") != -1 ||app.indexOf("买东西") != -1 ) {
            if (isAppInstalled(taobao)){
                apppackage = taobao;
                speak("正在打开淘宝");
            } else if (isAppInstalled(tianmao)){
                apppackage = tianmao;
                speak("正在打开天猫");
            } else if (isAppInstalled(jingdong)){
                apppackage = jingdong;
                speak("正在打开京东");
            }

            turnApp(apppackage);
        } else if (app.indexOf("微博") != -1) {
            apppackage = weibo;
            turnApp(apppackage);
        } else if (app.indexOf("词典") != -1 || app.indexOf("翻译") != -1) {
            apppackage = transfor;
            speak("正在打开词典");
            turnApp(apppackage);
        } else if (app.indexOf("火车") != -1 || app.indexOf("车票") != -1) {
            apppackage = train;
            speak("正在打开12306");
            turnApp(apppackage);
        } else if (app.indexOf("直播") != -1) {
            apppackage = zhibo;
            speak("正在打开斗鱼");
            turnApp(apppackage);
        } else if (app.indexOf("京东") != -1 ) {
            apppackage = jingdong;
            speak("正在打开京东");
            turnApp(apppackage);
        } else if (app.indexOf("天猫") != -1) {
            apppackage = tianmao;
            speak("正在打开天猫");
            turnApp(apppackage);
        } else if (app.indexOf("淘宝") != -1) {
            apppackage = taobao;
            speak("正在打开淘宝");
            turnApp(apppackage);
        } else if (app.indexOf("日历") != -1) {
            apppackage = calendar;
            turnApp(apppackage);
        } else if (app.indexOf("闹钟") != -1) {
            apppackage = clock;
            turnApp(apppackage);
        } else if (app.indexOf("计算器") != -1) {
            apppackage = calcu;
            turnApp(apppackage);
        }
        else {
            if (app.indexOf("打开") != -1) {
                try {
                    String name = app.substring(2);
                    Uri uri = Uri.parse("market://search?q=" + name);
                    Intent intent = new Intent(Intent.ACTION_VIEW,uri);
                    startActivity(intent);
                    speak("请先安装应用" + name);
                }catch (Exception e){
                    speak("系统出错");
                } finally {
                    return;
                }
            } else {
                try {
                    //String search = app.substring(2);

                    Intent intent = new Intent();
                    intent.setAction(Intent.ACTION_WEB_SEARCH);
                    intent.putExtra(SearchManager.QUERY,app);
                    startActivity(intent);
                    speak("正在搜索" + app);
                } catch (Exception e){
                    speak("系统出错");
                } finally {
                    return;
                }
            }

        }


    }

    //执行指令-打开应用
    @TargetApi(Build.VERSION_CODES.CUPCAKE)
    protected void turnApp(String packagename) {
        try {
            //方法1:通过包名获取类名
            /*
            PackageInfo packageInfo = getPackageManager().getPackageInfo(apppackage,0);
            Intent intentWeixin = new Intent(Intent.ACTION_MAIN,null);
            intentWeixin.setPackage(packageInfo.packageName);
            PackageManager packageManager = getPackageManager();
            List<ResolveInfo> apps = packageManager.queryIntentActivities(intentWeixin,0);
            ResolveInfo resolveInfo = apps.iterator().next();
            if (resolveInfo != null) {
                apppackage = resolveInfo.activityInfo.packageName;
                String className = resolveInfo.activityInfo.name;
                Intent intent = new Intent(Intent.ACTION_MAIN);
                intent.addCategory(Intent.CATEGORY_LAUNCHER);
                intent.setFlags(Intent.FLAG_ACTIVITY_NEW_TASK);
                ComponentName componentName = new ComponentName(apppackage,className);
                textResult.append(apppackage + "\n" + className);
                intent.setComponent(componentName);
                startActivity(intent);
            }*/

            //方法2:通过包名直接启动应用
            Intent intent;
            PackageManager packageManager=getPackageManager();
            intent = packageManager.getLaunchIntentForPackage(packagename);
            startActivity(intent);
        } catch (Exception e){
            speak( "应用未安装,请先安装应用");
            install(packagename);
        }
    }

    //判断app是否安装
    protected boolean isAppInstalled(String packagename) {
        try {
            this.getPackageManager().getPackageInfo(packagename,0);
            return true;
        } catch (PackageManager.NameNotFoundException e) {
            return false;
        }
    }

    //自动下载
    protected void install(String packagename) {
        Uri uri = Uri.parse("market://details?id=" + packagename);
        Intent intent = new Intent(Intent.ACTION_VIEW,uri);
        try {
            this.startActivity(intent);
        } catch (ActivityNotFoundException e){
            speak("没有找到该应用");
        }
    }
    //初始化TTS
    private void initTTS(){
        this.speechSynthesizer = SpeechSynthesizer.getInstance();
        this.speechSynthesizer.setContext(this);
        this.speechSynthesizer.setSpeechSynthesizerListener(new SpeechSynthesizerListener() {
            @Override
            public void onSynthesizeStart(String s) {

            }

            @Override
            public void onSynthesizeDataArrived(String s, byte[] bytes, int i) {

            }

            @Override
            public void onSynthesizeFinish(String s) {

            }

            @Override
            public void onSpeechStart(String s) {

            }

            @Override
            public void onSpeechProgressChanged(String s, int i) {

            }

            @Override
            public void onSpeechFinish(String s) {

            }

            @Override
            public void onError(String s, SpeechError speechError) {

            }
        });
        //文本模型路径(离线)
        this.speechSynthesizer.setParam(SpeechSynthesizer.PARAM_TTS_TEXT_MODEL_FILE,mSampleDirPath + "/" + TTS_TEXT_MODEL_FILE);
        //设置声学模型(男声、女生)
        this.speechSynthesizer.setParam(SpeechSynthesizer.PARAM_TTS_SPEECH_MODEL_FILE,mSampleDirPath + "/" + TTS_SPEECH_MODEL_FILE);
        //发声人
        this.speechSynthesizer.setParam(SpeechSynthesizer.PARAM_SPEAKER,"0");
        this.speechSynthesizer.setParam(SpeechSynthesizer.PARAM_MIX_MODE,SpeechSynthesizer.MIX_MODE_HIGH_SPEED_SYNTHESIZE);
        //请填写你申请到的appid、apikey
        this.speechSynthesizer.setAppId("9981566");
        this.speechSynthesizer.setApiKey("gT0SvLuw0qYVt6MIHdYEwSov","0cea5edd47a78f511f9ab95afff88480");
        //授权检测接口
        AuthInfo authInfo = this.speechSynthesizer.auth(TtsMode.MIX);

        if (authInfo.isSuccess()){
            Toast.makeText(MainActivity.this,"connected successed",Toast.LENGTH_SHORT).show();
        }else {
            Toast.makeText(MainActivity.this,"connected failed",Toast.LENGTH_SHORT).show();
            String errorMsg = authInfo.getTtsError().getDetailMessage();
            Toast.makeText(this,errorMsg,Toast.LENGTH_SHORT).show();
        }

        //初始化tts
        speechSynthesizer.initTts(TtsMode.MIX);
    }

    protected void speak(String text) {
        int re = this.speechSynthesizer.speak(text);
        if (re < 0) {
            Toast.makeText(MainActivity.this,"error to speak",Toast.LENGTH_SHORT).show();
        }
    }

    private void initEnv(){
        if (mSampleDirPath == null) {
            String path = Environment.getExternalStorageDirectory().toString();
            mSampleDirPath = path + "/" + "ASR_TTS";
            File file = new File(mSampleDirPath);
            if (!file.exists()) {
                file.mkdirs();
            }
        }

    }

}

bdspeech_digital_layout,xml

<?xml version="1.0" encoding="utf-8"?>
<!--
修改此文件请注意 连续上屏UE要求只能向上扩展,
所以在AppWidgetTargetActivity中调整布局的方法adjustView会对布局进行修改,
修改Root View 的 Gravity为Bottom,并计算设置PaddingBottom 以达到布局只向上扩展
-->
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:gravity="center" >

    <RelativeLayout
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:layout_marginLeft="14dp"
        android:layout_marginRight="14dp"
        android:tag="bg_layout" >

        <RelativeLayout
            android:layout_width="wrap_content"
            android:layout_height="wrap_content" >

            <LinearLayout
                android:layout_width="match_parent"
                android:layout_height="wrap_content"
                android:orientation="vertical"
                android:tag="main_reflect" >

                <LinearLayout
                    android:layout_width="match_parent"
                    android:layout_height="wrap_content"
                    android:orientation="vertical"
                    android:tag="recognizing_reflect" >

                    <FrameLayout
                        android:id="@+id/tips_container"
                        android:layout_width="match_parent"
                        android:layout_height="wrap_content"
                        android:orientation="vertical"
                        android:paddingLeft="20dp"
                        android:paddingRight="20dp"
                        android:paddingTop="20dp" >

                        <TextView
                            android:layout_width="match_parent"
                            android:layout_height="wrap_content"
                            android:background="@null"
                            android:gravity="center"
                            android:tag="tips_text"
                            android:textColor="#3e3e3e"
                            android:textSize="26sp" />

                        <TextView
                            android:layout_width="match_parent"
                            android:layout_height="wrap_content"
                            android:background="@null"
                            android:gravity="center"
                            android:tag="tips_wait_net"

                            android:textColor="#424242"
                            android:textSize="18sp" />

                        <EditText
                            android:layout_width="match_parent"
                            android:layout_height="wrap_content"
                            android:background="@null"
                            android:cursorVisible="true"
                            android:hint="@null"
                            android:inputType="none"
                            android:maxLines="3"
                            android:paddingTop="14dp"
                            android:tag="partial_text"
                            android:textColor="#3e3e3e"
                            android:textSize="18sp"
                            android:visibility="gone" />
                    </FrameLayout>

                    <com.baidu.voicerecognition.android.ui.SDKAnimationView
                        android:layout_width="match_parent"
                        android:layout_height="wrap_content"
                        android:layout_marginLeft="10dp"
                        android:layout_marginRight="10dp"
                        android:tag="voicewave_view" />

                    <!--  -->

                    <TextView
                        android:layout_width="match_parent"
                        android:layout_height="wrap_content"
                        android:background="@null"
                        android:gravity="left"
                        android:paddingBottom="5dp"
                        android:paddingLeft="10dip"
                        android:tag="logo_1"
                        android:textColor="#c1c1c1"
                        android:textSize="14sp" />

                    <TextView
                        android:layout_width="match_parent"
                        android:layout_height="wrap_content"
                        android:background="@null"
                        android:gravity="center_horizontal"
                        android:paddingBottom="5dp"
                        android:tag="suggestion_tips"
                        android:textColor="#c1c1c1"
                        android:textSize="14sp"
                        android:visibility="gone" />
                </LinearLayout>

                <com.baidu.voicerecognition.android.ui.SDKProgressBar
                    android:layout_width="match_parent"
                    android:layout_height="wrap_content"
                    android:tag="progress" />
                <!--
                     <ProgressBar
                    android:tag="progress"
                    style="?android:attr/progressBarStyleHorizontal"
                    android:layout_width="fill_parent"
                    android:layout_height="3dp"
                    android:max="100"
                    android:progress="0"
                    android:visibility="invisible" />
                -->

                <TextView
                    android:layout_width="match_parent"
                    android:layout_height="49dp"
                    android:gravity="center"
                    android:tag="speak_complete"
                    android:textSize="18sp" />
            </LinearLayout>

            <LinearLayout
                android:layout_width="match_parent"
                android:layout_height="wrap_content"
                android:layout_marginBottom="49dp"
                android:layout_marginLeft="16dp"
                android:layout_marginRight="16dp"
                android:orientation="vertical"
                android:tag="help_reflect"
                android:visibility="invisible" >

                <TextView
                    android:layout_width="match_parent"
                    android:layout_height="wrap_content"
                    android:layout_marginBottom="12dp"
                    android:layout_marginTop="16dp"
                    android:tag="help_title"
                    android:textSize="18sp"
                    android:textStyle="bold" />

                <ListView
                    android:layout_width="match_parent"
                    android:layout_height="0dp"
                    android:layout_weight="1"
                    android:divider="@null"
                    android:fadingEdge="vertical"
                    android:fadingEdgeLength="10dp"
                    android:listSelector="@android:color/transparent"
                    android:overScrollMode="never"
                    android:requiresFadingEdge="vertical"
                    android:tag="suggestions_list" >
                </ListView>
            </LinearLayout>

            <LinearLayout
                android:layout_width="match_parent"
                android:layout_height="wrap_content"
                android:orientation="vertical"
                android:tag="error_reflect"
                android:visibility="invisible" >

                <TextView
                    android:layout_width="wrap_content"
                    android:layout_height="0dp"
                    android:layout_weight="1"
                    android:background="@null"
                    android:gravity="center" />

                <TextView
                    android:layout_width="wrap_content"
                    android:layout_height="wrap_content"
                    android:layout_gravity="center"
                    android:layout_marginLeft="14dp"
                    android:layout_marginRight="14dp"
                    android:background="@null"
                    android:gravity="center"
                    android:tag="error_tips"
                    android:textColor="#3c3c3c"
                    android:textSize="19sp" />

                <TextView
                    android:layout_width="wrap_content"
                    android:layout_height="wrap_content"
                    android:layout_gravity="center"
                    android:layout_marginLeft="14dp"
                    android:layout_marginRight="14dp"
                    android:background="@null"
                    android:gravity="center"
                    android:tag="suggestion_tips_2"
                    android:textColor="#3c3c3c"
                    android:textSize="19sp" />

                <TextView
                    android:layout_width="wrap_content"
                    android:layout_height="0dp"
                    android:layout_weight="1" />
                <!--  -->

                <TextView
                    android:layout_width="wrap_content"
                    android:layout_height="wrap_content"
                    android:layout_gravity="left"
                    android:background="@null"
                    android:paddingBottom="5dp"
                    android:paddingLeft="10dip"
                    android:tag="logo_2"
                    android:textColor="#c1c1c1"
                    android:textSize="14sp" />

                <LinearLayout
                    android:layout_width="match_parent"
                    android:layout_height="49dp"
                    android:orientation="horizontal" >

                    <TextView
                        android:layout_width="0dp"
                        android:layout_height="match_parent"
                        android:layout_weight="1"
                        android:gravity="center"
                        android:tag="cancel_text_btn"
                        android:textSize="18sp" />

                    <TextView
                        android:layout_width="0dp"
                        android:layout_height="match_parent"
                        android:layout_weight="1"
                        android:gravity="center"
                        android:tag="retry_text_btn"
                        android:textSize="18sp" />
                </LinearLayout>
            </LinearLayout>
        </RelativeLayout>

        <ImageButton
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_alignParentRight="true"
            android:background="@null"
            android:contentDescription="@null"
            android:padding="14dp"
            android:tag="cancel_btn" />

        <ImageButton
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_alignParentLeft="true"
            android:background="@null"
            android:contentDescription="@null"
            android:padding="12dp"
            android:tag="help_btn" />
    </RelativeLayout>

</RelativeLayout>

activity_main.xml

<?xml version="1.0" encoding="utf-8"?>
<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    android:id="@+id/activity_main"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical"
    tools:context="com.example.ruanyulin.asr_tts.MainActivity">

    <TextView
        android:layout_width="match_parent"
        android:layout_height="match_parent"

        android:id="@+id/texx"
        android:layout_marginLeft="2dp"
        android:layout_gravity="center"/>
    <android.support.design.widget.FloatingActionButton
        android:id="@+id/fab"
        app:elevation="6dp"
        app:fabSize="auto"
        app:backgroundTint="#d1d1d1"
        app:pressedTranslationZ="10dp"
        app:rippleColor="#949494"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"

        android:layout_gravity="bottom|center"
        android:layout_margin="15dp"
        app:srcCompat="@android:drawable/ic_btn_speak_now"
         />
    <android.support.design.widget.FloatingActionButton
        android:id="@+id/fab_setting"
        app:elevation="6dp"
        app:fabSize="mini"
        app:backgroundTint="#fcfcfc"
        app:pressedTranslationZ="10dp"
        app:rippleColor="#949494"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"

        android:layout_gravity="end"
        android:layout_margin="8dp"
        app:srcCompat="@android:drawable/ic_menu_help"
        />

</FrameLayout>

大概就是这样了
这是完整代码github

2018-10-10 19:01:28 m0_37605956 阅读数 404
  • 基于模板的文字识别结果结构化处理技术

    OCR(文字识别)技术是目前常用的一种AI能力。但一般OCR的识别结果是一种按行输出的半结构化输出。本课程从百度自定义模板文字识别展开,从理论到案例,详细介绍OCR结构化的相关技术,并理清OCR和结构化之间的关系和适用场景。

    1200 人正在学习 去看看 CSDN讲师

摘要: 百度语音识别demo接入至vr设备:
项目所需,要测试百度语音识别对某些特定指令语言在vr设备上的识别正确率,需要首先研究百度语音识别的demo使用,特将整个学习过程记录:

一:百度语音android sdk下载地址
http://ai.baidu.com/sdk#asr
我下载了“离在线融合SDK”的android版本
上述链接的页面点击"使用说明"链接,进入SDK说明页面:
http://ai.baidu.com/docs#/ASR-Android-SDK/top

SDK下载后,参考了两个重要的文档,
第一个是目录下的readme_README_IMPORTANT
第二个是doc_integration_DOCUMENT目录下的"ASR-INTEGRATION-helloworld-V2.0",

二:根据上述文档集成百度asr至helloworld工程
由于下载下来的sdk直接导入到android studio中没有编译通过,所以直接新建项目集成语音识别功能,依据ASR-INTEGRATION-helloworld-V2.0文档,

首先新建一个android helloworld工程:
为了节约时间,没有在官网中注册应用。使用百度demo的各个参数。在工程新建过程中,完全按文档中的进行,包括company domain, appId,appkey,secretkey以及applicationId。在选择activity模板时候,选择了"Empty activity",android的版本号选择了默认的15

其次:工程建好编译通过后,按文档导入原demo的core模块并设置app依赖core,编译时候出现以下错误:
Android dependency ‘com.android.support:appcompat-v7’ has different version for the compile (26.1.0) and runtime (27.1.1) classpath. You should manually set the same version via DependencyResolution

网上搜索原因发现:
是由于app和core两个模块使用的依赖库(appcompat-v7)的版本号不同导致,
app的gradle:
dependencies {
implementation fileTree(include: [’
.jar’], dir: ‘libs’)
implementation ‘com.android.support:appcompat-v7:26.1.0’
implementation ‘com.android.support.constraint:constraint-layout:1.1.3’
testImplementation ‘junit:junit:4.12’
androidTestImplementation ‘com.android.support.test?1.0.2’
androidTestImplementation ‘com.android.support.test.espresso:espresso-core:3.0.2’
implementation project(’:core’)
}*
core的gradle:
dependencies {
api fileTree(include: [’
.jar’], dir: ‘libs’)
implementation ‘com.android.support:appcompat-v7:27.1.1’
}*

于是修改了app模块的库版本至:27.1.1,与core保持一致。
同时修改app模块build.gradle里的compileSdkVersion 27,targetSdkVersion 27,之后编译通过。

之后修改mainactivity如下:
public class MainActivity extends ActivityMiniRecog {

}

编译后安装到android手机可以进行语音识别

三: 将该demo移植至vr设备:
需要修改manifest,在MainActivity的intent-filter下增加:
category android:name=“com.***.intent.category.VRAPP”
这样安装后可以看到该AP

四: vr设备如何测试:
由于不能直接点击,所以在使用controller打开AP后,需要一个辅助软件vysor进行投屏,然后在电脑上面进行点击测试操作。

2019-12-09 17:42:10 jianghao_boke 阅读数 127
  • 基于模板的文字识别结果结构化处理技术

    OCR(文字识别)技术是目前常用的一种AI能力。但一般OCR的识别结果是一种按行输出的半结构化输出。本课程从百度自定义模板文字识别展开,从理论到案例,详细介绍OCR结构化的相关技术,并理清OCR和结构化之间的关系和适用场景。

    1200 人正在学习 去看看 CSDN讲师

python实现语音识别

我们用到是百度语音识别,因为不掏钱哈哈!首先去百度官网去创建你的 APPID AK SK 这个网上很多大家没创建的自己看下

目前本SDK的功能同REST API,需要联网调用http接口, 具体功能见REST API 文档, REST API 仅支持整段语音识别的模式,即需要上传完整语音文件进行识别,时长不超过60s,支持、自定义词库设置, 没有其他额外功能。
原始 PCM 的录音参数必须符合 16k 采样率、16bit 位深、单声道,支持的格式有:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)

首先安装使用Python SDK有如下方式

如果已安装pip,执行pip install baidu-aip即可。
如果已安装setuptools,执行python setup.py install即可

AipSpeech是语音识别的Python SDK客户端,为使用语音识别的开发人员提供了一系列的交互方法。

参考如下代码新建一个AipSpeech:

from aip import AipSpeech

""" 你的 APPID AK SK """
APP_ID = '你的 App ID'
API_KEY = '你的 Api Key'
SECRET_KEY = '你的 Secret Key'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

注意:如果导报aip有问题可能是下载版本问题可以

pip uninstall baidu-aip
pip install baidu-aip

API的具体代码

# 读取文件
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

# 识别本地文件
client.asr(get_file_content('audio.pcm'), 'pcm', 16000, {
    'dev_pid': 1536,
})

结果

// 成功返回
{
    "err_no": 0,
    "err_msg": "success.",
    "corpus_no": "15984125203285346378",
    "sn": "481D633F-73BA-726F-49EF-8659ACCC2F3D",
    "result": ["北京天气"]
}

// 失败返回
{
    "err_no": 2000,
    "err_msg": "data empty.",
    "sn": null
}

自己写的一段测试小代码喜欢的可以看看

from aip import AipSpeech


class BaiduVoiceToTxt(object):
    def __init__(self, client, file_path):
        self.client = client
        self.file_path = file_path

    # 读取文件
    @staticmethod
    def get_file_content(file_path):
        with open(file_path, 'rb') as fp:
            return fp.read()

    # 识别本地文件
    def run(self):
        print('正在识别.....')
        result = client.asr(self.get_file_content(self.file_path), 'pcm', 16000, {
            'dev_pid': 1536,
        })

        if not isinstance(result, dict):
            with open('testAudio.mp3', 'wb') as f:
                f.write(result)
        else:
            print('语音内容:{}'.format(result['result'][0]))


if __name__ == '__main__':

    APP_ID = '17973848'  # 你的 App ID'
    API_KEY = 'D7dskLcxcIqit6GjNxfLg3u0'  # 你的 Api Key'
    SECRET_KEY = 'NbTGUrNfKWgUwGP3TFd0MCFwGi4EkRda'  # 你的 Secret Key'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    file_path = 'D:\code\zuj_jian/test.pcm'

    baidu_voice_to_srt_obj = BaiduVoiceToTxt(client,file_path)
    baidu_voice_to_srt_obj.run()
2018-11-14 15:49:05 Virgil_K2017 阅读数 405
  • 基于模板的文字识别结果结构化处理技术

    OCR(文字识别)技术是目前常用的一种AI能力。但一般OCR的识别结果是一种按行输出的半结构化输出。本课程从百度自定义模板文字识别展开,从理论到案例,详细介绍OCR结构化的相关技术,并理清OCR和结构化之间的关系和适用场景。

    1200 人正在学习 去看看 CSDN讲师

Android 百度语音识别集成,非常简单
1.最近做一个语音识别的项目,网上找了个demo,很简单,直接调用这个类就可以,做个记录,方便以后用!

public class OnLineUtils implements EventListener {

private EventManager asr;
private OnLineCallBack onLineCallBack;

public OnLineUtils (Context context, OnLineCallBack onLineCallBack){
    asr = EventManagerFactory.create(context,"asr");
    asr.registerListener(this);

    this.onLineCallBack = onLineCallBack;
    initPermission(context);
}

/*
 * android 6.0 以上需要动态申请权限
 */
private void initPermission(Context context) {
    String permissions[] = {Manifest.permission.RECORD_AUDIO,
            Manifest.permission.ACCESS_NETWORK_STATE,
            Manifest.permission.INTERNET,
            Manifest.permission.READ_PHONE_STATE,
            Manifest.permission.WRITE_EXTERNAL_STORAGE
    };

    ArrayList<String> toApplyList = new ArrayList<String>();

    for (String perm : permissions) {
        if (PackageManager.PERMISSION_GRANTED != ContextCompat.checkSelfPermission(context, perm)) {
            toApplyList.add(perm);
            // 进入到这里代表没有权限.

        }
    }
    String tmpList[] = new String[toApplyList.size()];
    if (!toApplyList.isEmpty()) {
        ActivityCompat.requestPermissions((Activity) context, toApplyList.toArray(tmpList), 123);
    }

}


@Override
public void onEvent(String name, String params, byte[] data, int offset, int length) {
    if (params != null && !params.isEmpty()) {

        if (name.equals(SpeechConstant.CALLBACK_EVENT_ASR_PARTIAL)) {

            try {
                JSONObject jsonObject = new JSONObject(params);
                String resultType = jsonObject.getString("result_type");
                if (resultType.equals("final_result")){
                    String finalResult = jsonObject.getString("best_result");

                    onLineCallBack.onSuccess(finalResult);
                }
            } catch (JSONException e) {
                e.printStackTrace();
            }
        }

    }

}

public interface OnLineCallBack{
    void onSuccess(String result);
}

/**
 * 开始识别
 */
public void start(){
    Map<String,Object> params = new LinkedHashMap<>();
    String event = SpeechConstant.ASR_START;

    params.put(SpeechConstant.ACCEPT_AUDIO_VOLUME,false);
    String json = new JSONObject(params).toString();

    asr.send(event,json,null,0,0);
}

/**
 * 停止识别
 */
public void stop() {
    asr.send(SpeechConstant.ASR_STOP,null,null,0,0);
}

}

2019-08-20 22:57:48 zhangyuandilove 阅读数 325
  • 基于模板的文字识别结果结构化处理技术

    OCR(文字识别)技术是目前常用的一种AI能力。但一般OCR的识别结果是一种按行输出的半结构化输出。本课程从百度自定义模板文字识别展开,从理论到案例,详细介绍OCR结构化的相关技术,并理清OCR和结构化之间的关系和适用场景。

    1200 人正在学习 去看看 CSDN讲师

百度语音权限token获取地址:https://openapi.baidu.com/oauth/2.0/token

百度语音识别接口地址:http://vop.baidu.com/server_api

按照官方Api指导,第一步获取token,采用post请求方式,js代码如下:

var token;
function getToken() {
	$.post("https://openapi.baidu.com/oauth/2.0/token", {
		grant_type: 'client_credentials',
		client_id: '个人应用的clientid',//需要替换
		client_secret: '对应应用的加密串'//需要替换
	}, function(result) {
		token = result.access_token;
	}, "json")
}

token获取后,先放一边,我们进行页面语音的录制并播放。

var mediaRecorder;
	var voiceMsg = $("#voice-rec");//按钮,按住说话,放开进行语音识别
		voiceMsg.on('touchstart', function(ev) {
			$(this).html('松开 结束');
			$(this).addClass("activeBtn");
			$('#voiceInputLoading').show();
			setTimeout(function() {
				mediaRecorder.stop();
			}, 10000)
			mediaRecorder.record({
				samplerate: "16000",
				format: "amr",
				filename: "_doc/audio/"
			}, function(e) {
				plus.io.resolveLocalFileSystemURL(e, function(entry) {
					entry.file(function(file) {
						let reader = null;
						let size = file.size;
						reader = new plus.io.FileReader();
						reader.onload = function(e) {};
						reader.readAsDataURL(file);
						reader.onloadend = function(e) {
							var urlStr = e.target.result;
                            //注意以下字符截取,关键部分
							urlArr = urlStr.split(",")[1];
							//调用百度API进行语音识别
							getVoice2Text(urlArr, size, function(msg) {
								console.log(msg);
							});
						}
					})
				});
			}, function(e) {
				alert("Audio record failed: " + e.message);
			});
		});
		voiceMsg.on('touchend', function(ev) {
			$(this).html('按下 说话');
			$(this).removeClass("activeBtn");
			$('#voiceInputLoading').hide();
			mediaRecorder.stop();
		});

以上代码中getVoice2Text这个方法就是调用百度语音识别Api

function getVoice2Text(base64, urlSize,callback) {
	$.ajax({
		type: "post",
		url: "http://vop.baidu.com/server_api",
		async: true,
		contentType: "application/json",
		processData: false,
		data: JSON.stringify({
			"format": "amr", 
			"rate": 16000, 
			"dev_pid": 1536, 
			"channel": 1, 
			"cuid": "862245234377502,862989243244150", //设备的唯一id
			"speech": base64, 
			"len": urlSize, 
			"token": token//第一步获取到的token
		}),
		success: function(data) {
			console.log(data.result[0]);
			callback(data.result[0]);
		}
	});

}

这里需要注意一下的地方就是以下两点:

1:音频采用的是plus.audio提供的api;

2:音频文件转换采用的是plus.io提供的api

感兴趣的朋友可以参考:http://www.html5plus.org/doc/zh_cn/webview.html

百度语音识别

阅读数 332

参考链接百度文档中心

博文 来自: wangxiaowu1986
没有更多推荐了,返回首页