更新 mini支持讯飞中英文识别大模型,sant增加image对图像处理

This commit is contained in:
dahanzimin
2025-10-27 10:32:44 +08:00
parent f9d3427b20
commit d98b9fb0ed
4 changed files with 465 additions and 425 deletions

View File

@@ -45,7 +45,6 @@ class ASR_WebSocket(Ws_Param):
"domain": "iat",
"language": "zh_cn",
"accent": "mandarin",
"vinfo": 1,
"vad_eos": 1000,
"nbest": 1,
"wbest": 1,
@@ -56,7 +55,10 @@ class ASR_WebSocket(Ws_Param):
self.ws.settimeout(1000)
def _frame(self, status, buf):
return {"status": status, "format": "audio/L16;rate=8000", "audio": str(b64encode(buf), 'utf-8'), "encoding": "raw"}
if status == 0:
return {"common": {"app_id": self.APPID}, "business": self.business, "data": {"status": status, "format": "audio/L16;rate=8000", "audio": str(b64encode(buf), 'utf-8'), "encoding": "raw"}}
else:
return {"data": {"status": status, "format": "audio/L16;rate=8000", "audio": str(b64encode(buf), 'utf-8'), "encoding": "raw"}}
def on_message(self, message):
result = ""
@@ -98,17 +100,16 @@ class ASR_WebSocket(Ws_Param):
if pace: print('=',end ="")
# 第一帧处理
if _state == 0:
d = {"common": {"app_id": self.APPID}, "business": self.business, "data": self._frame(_state, buf)}
d = self._frame(_state, buf)
_state = 1
# 中间帧处理
else:
d = {"data": self._frame(_state, buf)}
d = self._frame(_state, buf)
self.ws.send(json.dumps(d))
#print("------",len(buf), time.ticks_diff(time.ticks_ms(), _star))
if time.ticks_diff(time.ticks_ms(), _star) > timeout:
raise OSError("Timeout pcm read error")
# 最后一帧处理
d = {"data": self._frame(2, b'\x00')}
d = self._frame(2, b'\x00')
self.ws.send(json.dumps(d))
onboard_bot.pcm_en(False) #PCM关闭
if pace: print(']')
@@ -122,6 +123,45 @@ class ASR_WebSocket(Ws_Param):
else:
print("Run error: %s" % (e))
#中英识别大模型
class IAT_WebSocket(ASR_WebSocket):
def __init__(self, APPID, APIKey, APISecret, url='ws://iat.xf-yun.com/v1', accent="mandarin", res_id=None):
super().__init__(APPID, APIKey, APISecret, url)
self.res_id = res_id
self.business = {
"domain": "slm",
"language": "zh_cn",
"accent": accent,
"result": {
"encoding": "utf8",
"compress": "raw",
"format": "plain"
}
}
def _frame(self, status, buf):
if status == 0:
return {"header": {"status": status, "app_id": self.APPID, "res_id": self.res_id}, "parameter": {"iat": self.business}, "payload": {"audio": { "audio": str(b64encode(buf), 'utf-8'), "sample_rate": 8000, "encoding": "raw"}}}
else:
return {"header": {"status": status, "app_id": self.APPID, "res_id": self.res_id}, "payload": {"audio": { "audio": str(b64encode(buf), 'utf-8'), "sample_rate": 8000, "encoding": "raw"}}}
def on_message(self, message):
result = ""
msg = json.loads(message)
code = msg['header']["code"]
if code != 0:
raise AttributeError("%s Code:%s" % (msg['header']["message"], code))
else:
if "payload" in msg:
text = msg["payload"]["result"]["text"]
data = json.loads(b64decode(text).decode())['ws']
for i in data:
for w in i["cw"]:
result += w["w"]
if msg["header"]["status"]== 2:
return result, False
return result, True
#大模型
class LLM_WebSocket(Ws_Param):
Model_url = {