标题
- 讯飞认证配置
- 封装监听器
- 客户端工具` Speech2TextClient.java `
- 对外开放接口
- 对外开放接口实现
- 结果
- 参考
根据官方提供的 WebIATWS
工具扩展修改,接入了讯飞的语音听写(STT)服务
讯飞认证配置
public class XFAuthorityConfig {public static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat";public static final String apiKey = "xxxx";public static final String apiSecret = "xxx";public static final String appid = "5ede17d7";
}
封装监听器
public class WrapListener extends WebSocketListener {private static final Logger LOGGER = LoggerFactory.getLogger(WrapListener.class);// 下面三个参数是我根据需要新增的// file 是要听写的音频文件,而language 支持 zh-CN,en-US]// result 是返回结果private InputStream file;private String language;private String result;private String appId = XFConfig.appid;public static final int StatusFirstFrame = 0;public static final int StatusContinueFrame = 1;public static final int StatusLastFrame = 2;public static final Gson json = new Gson();Decoder decoder = new Decoder();// 开始时间private static Date dateBegin = new Date();// 结束时间private static Date dateEnd = new Date();private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");public InputStream getFile() {return file;}public void setFile(InputStream file) {this.file = file;}public String getLanguage() {return language;}public void setLanguage(String language) {this.language = language;}public String getResult() {return result;}public void setResult(String result) {this.result = result;}@Overridepublic void onOpen(WebSocket webSocket, Response response) {super.onOpen(webSocket, response);int frameSize = 1280;int intervel = 40;int status = 0;try {byte[] buffer = new byte[frameSize];// 发送音频end:while (true) {int len = file.read(buffer);if (len == -1) {status = StatusLastFrame;}switch (status) {case StatusFirstFrame:JsonObject frame = new JsonObject();JsonObject business = new JsonObject()JsonObject common = new JsonObject();JsonObject data = new JsonObject();// 填充commoncommon.addProperty("app_id", appId);//填充businessbusiness.addProperty("language", language);business.addProperty("domain", "iat");business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值business.addProperty("dwa", "wpgs");data.addProperty("status", StatusFirstFrame);data.addProperty("format", "audio/L16;rate=8000");data.addProperty("encoding", "raw");data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(buffer, len)));//填充frameframe.add("common", common);frame.add("business", business);frame.add("data", data);webSocket.send(frame.toString());status = StatusContinueFrame; // 发送完第一帧改变status 为 1break;case StatusContinueFrame: //中间帧status = 1JsonObject frame1 = new JsonObject();JsonObject data1 = new JsonObject();data1.addProperty("status", StatusContinueFrame);data1.addProperty("format", "audio/L16;rate=8000");data1.addProperty("encoding", "raw");data1.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(buffer, len)));frame1.add("data", data1);webSocket.send(frame1.toString());break;case StatusLastFrame: // 最后一帧音频status = 2 ,标志音频发送结束JsonObject frame2 = new JsonObject();JsonObject data2 = new JsonObject();data2.addProperty("status", StatusLastFrame);data2.addProperty("audio", "");data2.addProperty("format", "audio/L16;rate=8000");data2.addProperty("encoding", "raw");frame2.add("data", data2);webSocket.send(frame2.toString());break end;}Thread.sleep(intervel);}LOGGER.info("all data is send");} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (InterruptedException e) {e.printStackTrace();}}@Overridepublic void onMessage(WebSocket webSocket, String text) {super.onMessage(webSocket, text);ResponseData resp = json.fromJson(text, ResponseData.class);if (resp != null) {if (resp.getCode() != 0) {LOGGER.debug("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());LOGGER.debug("错误码查询链接:https://www.xfyun.cn/document/error-code");return;}if (resp.getData() != null) {if (resp.getData().getResult() != null) {Text te = resp.getData().getResult().getText();try {decoder.decode(te);LOGGER.info("中间识别结果 ==》" + decoder.toString());} catch (Exception e) {e.printStackTrace();}}if (resp.getData().getStatus() == 2) {dateEnd = new Date();LOGGER.info(sdf.format(dateBegin) + "开始");LOGGER.info(sdf.format(dateEnd) + "结束");LOGGER.info("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");LOGGER.info("最终识别结果 ==》" + decoder.toString());this.result = decoder.toString();decoder.discard();webSocket.close(1000, "");} else {// todo 根据返回的数据处理}}}}@Overridepublic void onFailure(WebSocket webSocket, Throwable t, Response response) {super.onFailure(webSocket, t, response);try {if (null != response) {int code = response.code();LOGGER.info("onFailure code:" + code);LOGGER.info("onFailure body:" + response.body().string());if (101 != code) {LOGGER.debug("connection failed");System.exit(0);}}} catch (IOException e) {e.printStackTrace();}}public static class ResponseData {private int code;private String message;private String sid;private Data data;public int getCode() {return code;}public String getMessage() {return this.message;}public String getSid() {return sid;}public Data getData() {return data;}}public static class Data {private int status;private Result result;public int getStatus() {return status;}public Result getResult() {return result;}}public static class Result {int bg;int ed;String pgs;int[] rg;int sn;Ws[] ws;boolean ls;JsonObject vad;public Text getText() {Text text = new Text();StringBuilder sb = new StringBuilder();for (Ws ws : this.ws) {sb.append(ws.cw[0].w);}text.sn = this.sn;text.text = sb.toString();text.sn = this.sn;text.rg = this.rg;text.pgs = this.pgs;text.bg = this.bg;text.ed = this.ed;text.ls = this.ls;text.vad = this.vad == null ? null : this.vad;return text;}}public static class Ws {WebIATWS.Cw[] cw;int bg;int ed;}public static class Cw {int sc;String w;}public static class Text {int sn;int bg;int ed;String text;String pgs;int[] rg;boolean deleted;boolean ls;JsonObject vad;@Overridepublic String toString() {return "Text{" +"bg=" + bg +", ed=" + ed +", ls=" + ls +", sn=" + sn +", text='" + text + '\'' +", pgs=" + pgs +", rg=" + Arrays.toString(rg) +", deleted=" + deleted +", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) +'}';}}public static class Decoder {private Text[] texts;private int defc = 10;public Decoder() {this.texts = new Text[this.defc];}public synchronized void decode(Text text) {if (text.sn >= this.defc) {this.resize();}if ("rpl".equals(text.pgs)) {for (int i = text.rg[0]; i <= text.rg[1]; i++) {this.texts[i].deleted = true;}}this.texts[text.sn] = text;}public String toString() {StringBuilder sb = new StringBuilder();for (Text t : this.texts) {if (t != null && !t.deleted) {sb.append(t.text);}}return sb.toString();}public void resize() {int oc = this.defc;this.defc <<= 1;Text[] old = this.texts;this.texts = new Text[this.defc];for (int i = 0; i < oc; i++) {this.texts[i] = old[i];}}public void discard() {for (int i = 0; i < this.texts.length; i++) {this.texts[i] = null;}}}}
客户端工具Speech2TextClient.java
@Component
public class Speech2TextClient {private String authUri;public Speech2TextClient() throws Exception {authUri = Speech2TextClient.getAuthUrl(XFConfig.hostUrl, XFConfig.apiKey, XFConfig.apiSecret);authUri = authUri.replace("http://", "ws://").replace("https://", "wss://");}public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {URL url = new URL(hostUrl);SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);format.setTimeZone(TimeZone.getTimeZone("GMT"));String date = format.format(new Date());StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").append("date: ").append(date).append("\n").append("GET ").append(url.getPath()).append(" HTTP/1.1");Charset charset = Charset.forName("UTF-8");Mac mac = Mac.getInstance("hmacsha256");SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");mac.init(spec);byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));String sha = Base64.getEncoder().encodeToString(hexDigits);String authorization =String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"",apiKey, "hmac-sha256", "host date request-line", sha);HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().addQueryParameter("authorization",Base64.getEncoder().encodeToString(authorization.getBytes(charset))).addQueryParameter("date", date).addQueryParameter("host", url.getHost()).build();return httpUrl.toString();}public String stt(InputStream is, String language) throws IOException, InterruptedException {OkHttpClient client = new OkHttpClient.Builder().build();Request request = new Request.Builder().url(authUri).build();WrapListener wrapListener = new WrapListener();wrapListener.setFile(is);wrapListener.setLanguage(language);WebSocket ws = client.newWebSocket(request, wrapListener);while (true) {if (wrapListener.getResult() != null) {return wrapListener.getResult();}Thread.sleep(500);// 因为监听器是异步的,需要返回结果}}
}
对外开放接口
@RequestMapping("/api/web/v2")
public interface CloudService {@PostMapping(value = "/xf/stt", consumes = "multipart/form-data")ResponseResult speech2TextByXf(@RequestParam("language") String language,@RequestPart("audio") MultipartFile audio);
}
对外开放接口实现
@RestController
public class CloudServiceImpl implements CloudService {/*** xf*/@AutowiredSpeech2TextClient speech2TextClient;@Overridepublic ResponseResult speech2TextByXf(String language, MultipartFile audio) {try {String text = speech2TextClient.stt(audio.getInputStream(), language);if (text == null) {return ResponseUtil.error("转换信息为空");}return ResponseUtil.ok(text);} catch (Exception e) {return ResponseUtil.error();}}}
结果
好了,基本就到这里,具体的调优,去修改监听器和工具就可以了
参考
WebAPI 示例demo汇总(一)----语音听写(流式版)
语音听写(流式版)WebAPI 文档