完整可用,可二次开发工程源代码出售,联系V:zhiweizhiyuan 备注:u
chatgpt api+azure Speech Studio+ue5 metahuman相关资料
有很多人要分享代码,其实代码很少的,我也是四处找的被人分享的代码,组合起来用的,现在我把这些都贴下面,感兴趣的拿去玩吧.
首先,整个流程,这个是我复制别人的,我的流程大致相同,但我调用的是metahuman自带的口型表情。我的是全蓝图,没有代码哦。
以下纯属凑字数的。
1,按回车后录入几秒声音,发送到azure获得文本
2,将文本发送到chatgpt获得AI反馈的文本
3,将catagpt发送到azure获得声音
4,使用c#的很简单的程序获得当前系统的声音大小,用udp发送到ue5
5,ue5使用udp接受数据拿到声音强度系数,在动画蓝图中应用到jawOpen曲线上驱动嘴.
6,输入quit退出
1-3为python,4为c#,5为UE5
python:
import openai import azure.cognitiveservices.speech as speechsdk import os
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get(‘SPEECH_KEY’), region=os.environ.get(‘SPEECH_REGION’)) audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True) # file_config = speechsdk.audio.AudioOutputConfig(filename=”./output.wav”)
speech_config.speech_synthesis_voice_name = ‘zh-CN-XiaomoNeural’
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
speech_config2 = speechsdk.SpeechConfig(subscription=os.environ.get(‘SPEECH_KEY’), region=os.environ.get(‘SPEECH_REGION’), speech_recognition_language=”zh-cn”)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config2)
def from_mic(_speech_recognizer): result = _speech_recognizer.recognize_once_async().get() print(result.text) return result.text
openai.api_key = “你的chatgpt api key” print(“chatgpt api 接口测试\n”)
def chat(prompt): # 定义一个函数,以便后面反复调用
try: response = openai.Completion.create( model=”text-davinci-003″, prompt=prompt, temperature=0.9, max_tokens=2500, top_p=1, frequency_penalty=0.0, presence_penalty=0.6, stop=[” Human:”, ” AI:”] )
answer = response[“choices”][0][“text”].strip() return answer except Exception as exc: # print(exc) #如果需要打印出故障原因可以使用本行代码,如果想增强美感,就屏蔽它。 return “broken”
def speak(_speech_synthesizer, _text): if _text==””: return
strArr = _text.split(“:”, 1) print(strArr) if len(strArr)<2: return s=strArr[1] text = “”” <speak version=”1.0″ xmlns=”http://www.w3.org/2001/10/synthesis” xmlns:mstts=”https://www.w3.org/2001/mstts” xml:lang=”zh-CN”> <voice name=”zh-CN-XiaoxiaoNeural”> <mstts:express-as style=”chat” role=”YoungAdultFemale” > <prosody rate=”+12.00%”> “”” + s text += “”” </prosody> </mstts:express-as> </voice> </speak>””” result = _speech_synthesizer.speak_ssml_async(ssml=text).get()
text = “” # 设置一个字符串变量 turns = [] # 设置一个列表变量,turn指对话时的话轮 while True: # 能够连续提问 question = input() if len(question.strip()) == 0: # 如果输入为空,提醒输入问题 #print(“please input your question”) question=from_mic(speech_recognizer)
if question == “quit”: # 如果输入为”quit”,程序终止 print(“\nAI: 再见!”) speak(speech_synthesizer, “AI: 再见!”) break else: prompt = text + “\nHuman: ” + question result = chat(prompt)
while result == “broken”: # 问不出结果会自动反复提交上一个问题,直到有结果为止。 print(“please wait…”) result = chat(prompt) # 重复提交问题 else: turns += [question] + [result] # 只有这样迭代才能连续提问理解上下文 print(result) print(“===================\n\n\n”) speak(speech_synthesizer, result) # speakResult = speech_synthesizer.speak_text_async(strArr[1]).get() if len(turns) <= 10: # 为了防止超过字数限制程序会爆掉,所以提交的话轮语境为10次。 text = ” “.join(turns) else: text = ” “.join(turns[-10:]) 这段代码的部分参考了该网页的内容,感谢
https://cloud.tencent.com/developer/article/2179399?areaSource=&traceId=
os.environ.get(‘SPEECH_KEY’) 写到系统变量里的key
os.environ.get(‘SPEECH_REGION’) 直接把文本写代码里也可以的,不过安全系数不够高,也不方便多个代码文件的使用.
c#部分只是获取系统主音量的大小让嘴巴动而已,非常low,现在很多人在做audio2face,这个效果会好很多,只是我不是做这一块的,手头上没有现成的,反倒是有个以前做的一个获取系统音量的简单程序,拿来就用了.
using NAudio.CoreAudioApi; using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using CpLib; using Newtonsoft.Json;
namespace audioPoint { public partial class AudioPoint : Form { static public Net.UDP udp; static public string ipPort = “127.0.0.1:4600”; public AudioPoint() { InitializeComponent();
init(); } private void init() { MMDeviceEnumerator enumerator = new MMDeviceEnumerator(); var devices = enumerator.EnumerateAudioEndPoints(DataFlow.All, DeviceState.Active); audioDevicecomboBox1.Items.AddRange(devices.ToArray()); audioDevicecomboBox1.SelectedItem = devices.ToArray()[0]; udp = new Net.UDP(“audioPoint udp”,0); Net.SendWorker.Start();
} private void timer1_Tick(object sender, EventArgs e) { if (udp != null) { udp.Update(); } if (audioDevicecomboBox1.SelectedItem != null) { var device = (MMDevice)audioDevicecomboBox1.SelectedItem; progressBar1.Value = (int)(Math.Round(device.AudioMeterInformation.MasterPeakValue * 100 + 0.5)); //使用udp发送这个音量浮点值到UE //CpLib是我以前为了提升开发效率做的一个小功能库,使用udp代码发送数据的 代码实现一下发送功能就可以了. udp.SendToIpPort(ipPort, device.AudioMeterInformation.MasterPeakValue.ToString()); } }
private void AudioPoint_FormClosing(object sender, FormClosingEventArgs e) { if (udp != null) { udp.Close(); } Net.SendWorker.Close(); }
private void ipPortTextBox1_KeyDown(object sender, KeyEventArgs e) { if (e.KeyCode != Keys.Enter) return; ipPort = ipPortTextBox1.Text; } } } UE5部分,因为我是在以前有一点点功能的基础上改了一下用的,所以有点不相关的代码.其实呢,这里建议你们找一下UE5有个udp插件,直接提供udp的蓝图使用方式,会更简单,没必要去折腾这个代码,就是监听一个udp端口拿到数据.
AIVision.h
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include “CoreMinimal.h” #include “Common/UdpSocketBuilder.h” #include “Common/UdpSocketReceiver.h” #include “Kismet/BlueprintFunctionLibrary.h” #include “AIVision.generated.h”
class CppUdp { public: CppUdp(int _port=0,FString name=”CppUdp”); ~CppUdp(); int port=0; TSharedPtr<FInternetAddr> RemoteAddr; FIPv4Endpoint remotePoint=FIPv4Endpoint(); void send(FString str); void update(); void close(); FSocket *udpSocket=nullptr; private: FUdpSocketReceiver* udpReceiver=nullptr;
FCriticalSection m_mutex; void OnUdpReceiver(const FArrayReaderPtr& readerPtr, const FIPv4Endpoint& endPoint);
};
/** * */ UCLASS() class CPPAUDIO2FACE_API UAIVision : public UBlueprintFunctionLibrary { GENERATED_BODY() public:
UFUNCTION(BlueprintCallable,Category=CppAi) static void start(); UFUNCTION(BlueprintCallable,Category=CppAi) static void close(); UFUNCTION(BlueprintCallable,Category=CppAi) static void getFacePos(TArray<float>& data); UFUNCTION(BlueprintCallable,Category=CppAi) static float getAudioVal();//蓝图中获取udp传递过来的系统音量值 0.0-1.0
static void readStr(const FString str); private: static CppUdp* cppUdp; static TArray<float> raw; static FCriticalSection m_mutex; static float audioVal; }; AIVision.cpp
// Fill out your copyright notice in the Description page of Project Settings.
#include “AIVision.h”
CppUdp* UAIVision::cppUdp = nullptr; TArray<float> UAIVision::raw = TArray<float>(); FCriticalSection UAIVision::m_mutex; float UAIVision::audioVal = 0;
CppUdp::CppUdp(int _port, FString name) { port = _port; port = 4600; FUdpSocketBuilder* builder = new FUdpSocketBuilder(TEXT(“CppUdp”)); //开启广播功能 builder->WithBroadcast(); FIPv4Address addrIp; FIPv4Endpoint bindEndpoint;
//把字符串地址转换成对象 bool b = FIPv4Address::Parse(TEXT(“127.0.0.1”), addrIp); bindEndpoint.Address = addrIp; bindEndpoint.Port = 4600;
//bind 一个端口,这样才能接受消息 builder->BoundToEndpoint(bindEndpoint); udpSocket = builder->Build(); if (udpSocket == nullptr) { //Log::w(“CppUdp bind fail ,port= “+0); return; } FTimespan waitTime(1 / 120); if (udpSocket) { udpReceiver = new FUdpSocketReceiver(udpSocket, waitTime, TEXT(“AOctLiveClient”)); //绑定接收消息的回调函数 udpReceiver->OnDataReceived().BindRaw(this, &CppUdp::OnUdpReceiver); udpReceiver->Start(); }
FIPv4Address centerAddrIp; b = FIPv4Address::Parse(TEXT(“127.0.0.1”), centerAddrIp); RemoteAddr = ISocketSubsystem::Get(PLATFORM_SOCKETSUBSYSTEM)->CreateInternetAddr(); RemoteAddr->SetIp(centerAddrIp.Value); RemoteAddr->SetPort(4700); }
CppUdp::~CppUdp() { }
void CppUdp::send(FString str) { if (udpSocket == nullptr)return; }
void CppUdp::update() { }
void CppUdp::close() { m_mutex.Lock(); if (udpReceiver != nullptr) { udpReceiver->Stop(); //udpReceiver->Exit(); udpReceiver = nullptr; } if (udpSocket != nullptr) { udpSocket->Close(); udpSocket = nullptr; } m_mutex.Unlock(); }
void CppUdp::OnUdpReceiver(const FArrayReaderPtr& readerPtr, const FIPv4Endpoint& endPoint) { int size = readerPtr->Num() + 1; uint8* data2 = new uint8[size]; FMemory::Memzero(data2, size); FMemory::Memcpy(data2, readerPtr->GetData(), readerPtr->Num()); const FString str = UTF8_TO_TCHAR(reinterpret_cast<const char*>(data2)); UAIVision::readStr(str); delete[] data2; remotePoint = endPoint; }
void UAIVision::start() { cppUdp = new CppUdp(4600); }
void UAIVision::close() { if (cppUdp != nullptr) { cppUdp->close(); delete cppUdp; } }
void UAIVision::getFacePos(TArray<float>& data) { m_mutex.Lock(); data.Empty(); for (auto f : raw) { data.Add(f); } m_mutex.Unlock(); }
float UAIVision::getAudioVal() { float v = 0; m_mutex.Lock(); v = audioVal; m_mutex.Unlock(); return v; }
void UAIVision::readStr(const FString str) { m_mutex.Lock(); audioVal = FCString::Atof(*str); m_mutex.Unlock(); } 动画蓝图中获取音量值
modify curve节点使用音量值修改jawOpen的值,这是张嘴幅度,效果low到爆,哈哈哈哈
总结:说实话,效果好主要是使用了UE5的metahuman资产和都市人群的表情动作,让人物很生动,其他的就是调接口转来转去,没啥含金量,放出来给大家参考吧,python部分是可以单独运行的,语音对话功能,c#和UE5只是利用系统的声音做了个假假的表演而已,非常非常临时敷衍的拼凑