停更太久了,倒也不是多忙,主要是學習的熱情降低了,又比較懶,因此,即使有做出新的玩意或者有所收穫,也懶得去碼字。最近做了一個百度AI的web端玩具,可以臉部辨識/註冊/顏值打分/手勢識別,最騷的是可以通過語音進行相關指令的控制,大概就長下面這樣。臉部辨識部分包括人臉註冊/人臉1:n識別,顏值打分這塊,在上以一個文章:百度臉部辨識HTTP SDk實戰:基於C# ASP.NET CORE MVC 3.1 上講的比較細了,顏值打分就是在之前的程式碼裡,加一個引數,具體看百度檔案就知道,這裡不再進行論述,本文主要講解手勢識別以及語音識別部分程式碼邏輯。
效果圖如下:支援24種手勢,具體:https://cloud.baidu.com/doc/BODY/s/Dk3cpyr8l
這一塊官方檔案已經有很好的C#範例了,稍微修改一下就可以用了
public string GetAccessToken()
{
string authHost = "https://aip.baidubce.com/oauth/2.0/token";
HttpClient client = new HttpClient();
List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>();
paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair<string, string>("client_id", _configuration["BaiduAiConfig:BaiDuGestureRecon:ApiKey_Gesture"]));
paraList.Add(new KeyValuePair<string, string>("client_secret",
_configuration["BaiduAiConfig:BaiDuGestureRecon:SecretKey_Gesture"]));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
string result = response.Content.ReadAsStringAsync().Result;
var resultJson = JsonConvert.DeserializeObject<JObject>(result);
AccessToken = resultJson["access_token"].ToString();
return AccessToken;
}
public IActionResult GestureFromWeb(string imgData64FromAjax)
{
GetAccessToken();
string host = "https://aip.baidubce.com/rest/2.0/image-classify/v1/gesture?access_token=" + AccessToken;
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
request.Method = "post";
request.KeepAlive = true;
// 圖片的base64編碼
// string base64 = GetFileBase64("[本地圖片檔案]");
string requestImgData64 = imgData64FromAjax;
requestImgData64 = requestImgData64.Substring(requestImgData64.IndexOf(",") + 1);
String str = "image=" + HttpUtility.UrlEncode(requestImgData64);
byte[] buffer = encoding.GetBytes(str);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
string result = reader.ReadToEnd();
var resultJson = JsonConvert.DeserializeObject<JObject>(result);
if (int.Parse(resultJson["result_num"].ToString()) != 0)
{
string gestureToken = resultJson["result"][0]["classname"].ToString();
GestureResultDict resultDict = new GestureResultDict();
try
{
//這裡寫了一個手勢對映的字典
string resultStr = resultDict.resultDict.FirstOrDefault(x => x.Key == gestureToken).Value;
if (!string.IsNullOrWhiteSpace(resultStr))
{
return Json(resultStr);
}
return Json("無法識別手勢");
}
catch
{
return Json("無法識別手勢");
}
}
return RedirectToAction("index", "home");
}
private Dictionary<string, string> results = new Dictionary<string, string>()
{
{"Ok","Ok" },
{"Six","數位6" },
{"Rock","Rock" },
{"Thumb_up","點贊" },
{"One","數位1" },
{"Five","數位5" },
{"Fist","拳頭" },
{"Prayer","上天保佑" },
{"Congratulation","恭喜恭喜" },
{"Heart_single","筆芯" },
{"Thumb_down","鄙視你" },
{"ILY","黑鳳梨" },
{ "Insult","豎中指"},
{ "Nine", "數位9" },
{ "Eight","數位8"},
{ "Seven","數位7"},
{ "Four","數位4"},
{ "Tow","數位2/Yeah"}
};
這裡主要是從前端傳來base64的圖片編碼,前端邏輯採用了Jquery 。
function reconGesture() {
let video = document.getElementById("video");
var canvas = $('#canvasGesture')[0];
let ctx = canvas.getContext('2d');
//
canvas.height = 465;
canvas.width = 400;
ctx.drawImage(video, 0, 0, 400, 400);
ctx.scale(-1, 1);
var img = convertCanvasToImage(canvas);
$.ajax({
url: '/Gesture/GestureFromWeb',
type: 'post',
dataType: 'json',
data: { "imgData64FromAjax": img.src },
success: function (jsonStr) {
var data = JSON.stringify(jsonStr);
console.log(data);
$("#gestureText").html("手勢識別結果為:" + data);
}
})
//let img = document.getElementById("canvas").toDataURL("image/png");
//var triggerDownload = $("#downloadA").attr("href", img).attr("download", "micro-blog.png");
//triggerDownload[0].click();
}
//從 canvas 提取圖片 image
function convertCanvasToImage(canvas) {
//新Image物件,可以理解為DOM
var image = new Image();
// canvas.toDataURL 返回的是一串Base64編碼的URL
// 指定格式 PNG
image.src = canvas.toDataURL("image/png");
return image;
}
語音識別這塊筆記麻煩一些,錄音軟體採用了Recorder.js作為外掛進行錄音上傳,由於百度語音識別對於語音樣本有如下要求:
(function (f) { if (typeof exports === "object" && typeof module !== "undefined") { module.exports = f() } else if (typeof define === "function" && define.amd) { define([], f) } else { var g; if (typeof window !== "undefined") { g = window } else if (typeof global !== "undefined") { g = global } else if (typeof self !== "undefined") { g = self } else { g = this } g.Recorder = f() } })(function () {
var define, module, exports; return (function e(t, n, r) { function s(o, u) { if (!n[o]) { if (!t[o]) { var a = typeof require == "function" && require; if (!u && a) return a(o, !0); if (i) return i(o, !0); var f = new Error("Cannot find module '" + o + "'"); throw f.code = "MODULE_NOT_FOUND", f } var l = n[o] = { exports: {} }; t[o][0].call(l.exports, function (e) { var n = t[o][1][e]; return s(n ? n : e) }, l, l.exports, e, t, n, r) } return n[o].exports } var i = typeof require == "function" && require; for (var o = 0; o < r.length; o++)s(r[o]); return s })({
1: [function (require, module, exports) {
"use strict";
module.exports = require("./recorder").Recorder;
}, { "./recorder": 2 }], 2: [function (require, module, exports) {
'use strict';
var _createClass = (function () {
function defineProperties(target, props) {
for (var i = 0; i < props.length; i++) {
var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor);
}
} return function (Constructor, protoProps, staticProps) {
if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor;
};
})();
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.Recorder = undefined;
var _inlineWorker = require('inline-worker');
var _inlineWorker2 = _interopRequireDefault(_inlineWorker);
function _interopRequireDefault(obj) {
return obj && obj.__esModule ? obj : { default: obj };
}
function _classCallCheck(instance, Constructor) {
if (!(instance instanceof Constructor)) {
throw new TypeError("Cannot call a class as a function");
}
}
var Recorder = exports.Recorder = (function () {
function Recorder(source, cfg) {
var _this = this;
_classCallCheck(this, Recorder);
this.config = {
bufferLen: 4096,
numChannels: 2,
mimeType: 'audio_pcm/wav'
};
this.recording = false;
this.callbacks = {
getBuffer: [],
exportWAV: []
};
Object.assign(this.config, cfg);
this.context = source.context;
this.node = (this.context.createScriptProcessor || this.context.createJavaScriptNode).call(this.context, this.config.bufferLen, this.config.numChannels, this.config.numChannels);
this.node.onaudioprocess = function (e) {
if (!_this.recording) return;
var buffer = [];
for (var channel = 0; channel < _this.config.numChannels; channel++) {
buffer.push(e.inputBuffer.getChannelData(channel));
}
_this.worker.postMessage({
command: 'record',
buffer: buffer
});
};
source.connect(this.node);
this.node.connect(this.context.destination); //this should not be necessary
var self = {};
this.worker = new _inlineWorker2.default(function () {
var recLength = 0,
recBuffers = [],
sampleRate = undefined,
numChannels = undefined;
// var sampleStep = this.context.sampleRate / sampleRate;
self.onmessage = function (e) {
switch (e.data.command) {
case 'init':
init(e.data.config);
break;
case 'record':
record(e.data.buffer);
break;
case 'exportWAV':
exportWAV(e.data.type);
break;
case 'getBuffer':
getBuffer();
break;
case 'clear':
clear();
break;
}
};
function init(config) {
sampleRate = config.sampleRate;
numChannels = config.numChannels;
initBuffers();
}
function record(inputBuffer) {
for (var channel = 0; channel < numChannels; channel++) {
recBuffers[channel].push(inputBuffer[channel]);
}
recLength += inputBuffer[0].length;
}
function exportWAV(type) {
var buffers = [];
for (var channel = 0; channel < numChannels; channel++) {
buffers.push(mergeBuffers(recBuffers[channel], recLength));
}
var interleaved = undefined;
if (numChannels === 2) {
interleaved = interleave(buffers[0], buffers[1]);
} else {
//處理單聲道
interleaved = extractSingleChannel(buffers[0]);
}
var dataview = encodeWAV(interleaved);
var audioBlob = new Blob([dataview], { type: type });
self.postMessage({ command: 'exportWAV', data: audioBlob });
}
function getBuffer() {
var buffers = [];
for (var channel = 0; channel < numChannels; channel++) {
buffers.push(mergeBuffers(recBuffers[channel], recLength));
}
self.postMessage({ command: 'getBuffer', data: buffers });
}
function clear() {
recLength = 0;
recBuffers = [];
initBuffers();
}
function initBuffers() {
for (var channel = 0; channel < numChannels; channel++) {
recBuffers[channel] = [];
}
}
function mergeBuffers(recBuffers, recLength) {
var result = new Float32Array(recLength);
var offset = 0;
for (var i = 0; i < recBuffers.length; i++) {
result.set(recBuffers[i], offset);
offset += recBuffers[i].length;
}
return result;
}
function interleave(inputL, inputR) {
var length = inputL.length + inputR.length;
var result = new Float32Array(length);
var index = 0,
inputIndex = 0;
while (index < length) {
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
function extractSingleChannel(input) {
//如果此處不按比例縮短,實際輸出的檔案會包含sampleStep倍長度的空錄音
var length = Math.ceil(input.length / 1);
var result = new Float32Array(length);
var index = 0,
inputIndex = 0;
while (index < length) {
//此處是處理關鍵,演演算法就是輸入的資料點每隔sampleStep距離取一個點放入result
result[index++] = input[inputIndex];
inputIndex += 1;
}
return result;
}
function floatTo16BitPCM(output, offset, input) {
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string) {
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function encodeWAV(samples) {
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* RIFF chunk length */
view.setUint32(4, 36 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, numChannels, true);
/* sample rate */
view.setUint32(24, sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, numChannels * 2, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
}, self);
this.worker.postMessage({
command: 'init',
config: {
sampleRate: this.context.sampleRate,
numChannels: this.config.numChannels
}
});
this.worker.onmessage = function (e) {
var cb = _this.callbacks[e.data.command].pop();
if (typeof cb == 'function') {
cb(e.data.data);
}
};
}
_createClass(Recorder, [{
key: 'record',
value: function record() {
this.recording = true;
}
}, {
key: 'stop',
value: function stop() {
this.recording = false;
}
}, {
key: 'clear',
value: function clear() {
this.worker.postMessage({ command: 'clear' });
}
}, {
key: 'getBuffer',
value: function getBuffer(cb) {
cb = cb || this.config.callback;
if (!cb) throw new Error('Callback not set');
this.callbacks.getBuffer.push(cb);
this.worker.postMessage({ command: 'getBuffer' });
}
}, {
key: 'exportWAV',
value: function exportWAV(cb, mimeType) {
mimeType = mimeType || this.config.mimeType;
cb = cb || this.config.callback;
if (!cb) throw new Error('Callback not set');
this.callbacks.exportWAV.push(cb);
this.worker.postMessage({
command: 'exportWAV',
type: mimeType
});
}
}], [{
key: 'forceDownload',
value: function forceDownload(blob, filename) {
var url = (window.URL || window.webkitURL).createObjectURL(blob);
var link = window.document.createElement('a');
link.href = url;
link.download = filename || 'output.wav';
var click = document.createEvent("Event");
click.initEvent("click", true, true);
link.dispatchEvent(click);
}
}]);
return Recorder;
})();
exports.default = Recorder;
}, { "inline-worker": 3 }], 3: [function (require, module, exports) {
"use strict";
module.exports = require("./inline-worker");
}, { "./inline-worker": 4 }], 4: [function (require, module, exports) {
(function (global) {
"use strict";
var _createClass = (function () { function defineProperties(target, props) { for (var key in props) { var prop = props[key]; prop.configurable = true; if (prop.value) prop.writable = true; } Object.defineProperties(target, props); } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
var _classCallCheck = function (instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } };
var WORKER_ENABLED = !!(global === global.window && global.URL && global.Blob && global.Worker);
var InlineWorker = (function () {
function InlineWorker(func, self) {
var _this = this;
_classCallCheck(this, InlineWorker);
if (WORKER_ENABLED) {
var functionBody = func.toString().trim().match(/^function\s*\w*\s*\([\w\s,]*\)\s*{([\w\W]*?)}$/)[1];
var url = global.URL.createObjectURL(new global.Blob([functionBody], { type: "text/javascript" }));
return new global.Worker(url);
}
this.self = self;
this.self.postMessage = function (data) {
setTimeout(function () {
_this.onmessage({ data: data });
}, 0);
};
setTimeout(function () {
func.call(self);
}, 0);
}
_createClass(InlineWorker, {
postMessage: {
value: function postMessage(data) {
var _this = this;
setTimeout(function () {
_this.self.onmessage({ data: data });
}, 0);
}
}
});
return InlineWorker;
})();
module.exports = InlineWorker;
}).call(this, typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
}, {}]
}, {}, [1])(1)
});
複製上面的程式碼,引入頁面中
這裡面注意
var audio_context = new AudioContext({ sampleRate: 16000 });//音訊內容物件 這句程式碼就可以了,不要修改,與上文的Recoeder.js中的邏輯是對應的,這樣取樣出來的音訊檔才是符合百度要求的。直接拷貝程式碼就能用。通過語音控制臉部辨識等操作,只需要再1寫一下邏輯即可,前端判斷一下某些關鍵詞是否存在,從而觸發某些方法,範例程式碼中觸發的是「手勢識別」,弄了一個定時器,點選開始錄音,5s後自動進行語音識別操作,更加智慧一些。
<script type="text/javascript">
var reco = null;
// var audio_context = new AudioContext();//音訊內容物件
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia); // 相容其他瀏覽器
navigator.getUserMedia({ audio: true }, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
//這裡寫死sampleRate: 16000
var audio_context = new AudioContext({ sampleRate: 16000 });//音訊內容物件
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input, {
numChannels: 1
});
}
var clock = '';
function start_reco() {
reco.record();
clock = setInterval(ai_reco, 5000)
console.log("666")
}
function ai_reco() {
reco.stop();
clearInterval(clock);
reco.exportWAV(function (wav_file) {
console.log(wav_file);
var formdata = new FormData(); // form 表單 {key:value}
formdata.append("audio", wav_file); // form input type="file"
$.ajax({
url: "/Recorder/RecorderVoice",
type: 'post',
processData: false,
contentType: false,
data: formdata,
dataType: 'json',
success: function (jsonStr) {
var data = JSON.stringify(jsonStr);
if (data.search("手勢識別") != -1) {
$("#btn_rcon").click();
}
$("#voiceText").html("語音識別結果:" + data);
// document.getElementById("player").src = "/get_audio/" + data.filename;
}
})
});
reco.clear();
}
</script>
這裡沒啥講的了,注意一下去官方開通相應的語音服務,填入對應的密匙,id等引數,還要開通普通話api
public IActionResult RecorderVoice([FromForm] IFormFile audio)
{
string appId = _configuration["BaiduAiConfig:BaiDuLanguage:AppId_Language"];
string apiKey = _configuration["BaiduAiConfig:BaiDuLanguage:ApiKey_Language"];
string secertKey = _configuration["BaiduAiConfig:BaiDuLanguage:SecertKey_Language"];
var client = new Baidu.Aip.Speech.Asr(appId, apiKey, secertKey);
client.Timeout = 60000; // 修改超時時間
string filename = Path.Combine("wwwroot/files", Guid.NewGuid().ToString().Substring(0, 6) + ".wav");
using
(FileStream fs = System.IO.File.Create(filename))
{
audio.CopyTo(fs);
fs.Flush();
}
FileStream filestream = new FileStream(filename, FileMode.Open);
byte[] arr = new byte[filestream.Length];
filestream.Read(arr, 0, (int)filestream.Length);
filestream.Close();
// 可選引數
var options = new Dictionary<string, object>
{
{"dev_pid", 1537}
// {"dev_pid",1737 }
};
client.Timeout = 120000; // 若語音較長,建議設定更大的超時時間. ms
var result = client.Recognize(arr, "wav", 16000, options);
if (int.Parse(result["err_no"].ToString()) == 0 && result["err_msg"].ToString() == "success.")
{
return Json(result["result"][0].ToString());
}
return Json("Erro");
}
}
就到這吧,需要完整可執行程式碼,關注私信吧。。。繼續搬磚了。。。