完整可用,可二次开发工程源代码出售,联系V:zhiweizhiyuan 备注:u

chatgpt api+azure Speech Studio+ue5 metahuman相关资料
有很多人要分享代码,其实代码很少的,我也是四处找的被人分享的代码,组合起来用的,现在我把这些都贴下面,感兴趣的拿去玩吧.
首先,整个流程,这个是我复制别人的,我的流程大致相同,但我调用的是metahuman自带的口型表情。我的是全蓝图,没有代码哦。
以下纯属凑字数的。
1,按回车后录入几秒声音,发送到azure获得文本
2,将文本发送到chatgpt获得AI反馈的文本
3,将catagpt发送到azure获得声音
4,使用c#的很简单的程序获得当前系统的声音大小,用udp发送到ue5
5,ue5使用udp接受数据拿到声音强度系数,在动画蓝图中应用到jawOpen曲线上驱动嘴.
6,输入quit退出
1-3为python,4为c#,5为UE5
python:
import openai
import azure.cognitiveservices.speech as speechsdk
import os
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get(‘SPEECH_KEY’),
region=os.environ.get(‘SPEECH_REGION’))
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# file_config = speechsdk.audio.AudioOutputConfig(filename=”./output.wav”)
speech_config.speech_synthesis_voice_name = ‘zh-CN-XiaomoNeural’
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
speech_config2 = speechsdk.SpeechConfig(subscription=os.environ.get(‘SPEECH_KEY’),
region=os.environ.get(‘SPEECH_REGION’),
speech_recognition_language=”zh-cn”)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config2)
def from_mic(_speech_recognizer):
result = _speech_recognizer.recognize_once_async().get()
print(result.text)
return result.text
openai.api_key = “你的chatgpt api key”
print(“chatgpt api 接口测试\n”)
def chat(prompt): # 定义一个函数,以便后面反复调用
try:
response = openai.Completion.create(
model=”text-davinci-003″,
prompt=prompt,
temperature=0.9,
max_tokens=2500,
top_p=1,
frequency_penalty=0.0,
presence_penalty=0.6,
stop=[” Human:”, ” AI:”]
)
answer = response[“choices”][0][“text”].strip()
return answer
except Exception as exc:
# print(exc) #如果需要打印出故障原因可以使用本行代码,如果想增强美感,就屏蔽它。
return “broken”
def speak(_speech_synthesizer, _text):
if _text==””:
return
strArr = _text.split(“:”, 1)
print(strArr)
if len(strArr)<2:
return
s=strArr[1]
text = “””
<speak version=”1.0″ xmlns=”http://www.w3.org/2001/10/synthesis” xmlns:mstts=”https://www.w3.org/2001/mstts” xml:lang=”zh-CN”>
<voice name=”zh-CN-XiaoxiaoNeural”>
<mstts:express-as style=”chat” role=”YoungAdultFemale” >
<prosody rate=”+12.00%”>
“”” + s
text += “””
</prosody>
</mstts:express-as>
</voice>
</speak>”””
result = _speech_synthesizer.speak_ssml_async(ssml=text).get()
text = “” # 设置一个字符串变量
turns = [] # 设置一个列表变量,turn指对话时的话轮
while True: # 能够连续提问
question = input()
if len(question.strip()) == 0: # 如果输入为空,提醒输入问题
#print(“please input your question”)
question=from_mic(speech_recognizer)
if question == “quit”: # 如果输入为”quit”,程序终止
print(“\nAI: 再见!”)
speak(speech_synthesizer, “AI: 再见!”)
break
else:
prompt = text + “\nHuman: ” + question
result = chat(prompt)
while result == “broken”: # 问不出结果会自动反复提交上一个问题,直到有结果为止。
print(“please wait…”)
result = chat(prompt) # 重复提交问题
else:
turns += [question] + [result] # 只有这样迭代才能连续提问理解上下文
print(result)
print(“===================\n\n\n”)
speak(speech_synthesizer, result)
# speakResult = speech_synthesizer.speak_text_async(strArr[1]).get()
if len(turns) <= 10: # 为了防止超过字数限制程序会爆掉,所以提交的话轮语境为10次。
text = ” “.join(turns)
else:
text = ” “.join(turns[-10:])
这段代码的部分参考了该网页的内容,感谢
os.environ.get(‘SPEECH_KEY’) 写到系统变量里的key
os.environ.get(‘SPEECH_REGION’) 直接把文本写代码里也可以的,不过安全系数不够高,也不方便多个代码文件的使用.
c#部分只是获取系统主音量的大小让嘴巴动而已,非常low,现在很多人在做audio2face,这个效果会好很多,只是我不是做这一块的,手头上没有现成的,反倒是有个以前做的一个获取系统音量的简单程序,拿来就用了.
using NAudio.CoreAudioApi;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using CpLib;
using Newtonsoft.Json;
namespace audioPoint
{
public partial class AudioPoint : Form
{
static public Net.UDP udp;
static public string ipPort = “127.0.0.1:4600”;
public AudioPoint()
{
InitializeComponent();
init();
}
private void init()
{
MMDeviceEnumerator enumerator = new MMDeviceEnumerator();
var devices = enumerator.EnumerateAudioEndPoints(DataFlow.All, DeviceState.Active);
audioDevicecomboBox1.Items.AddRange(devices.ToArray());
audioDevicecomboBox1.SelectedItem = devices.ToArray()[0];
udp = new Net.UDP(“audioPoint udp”,0);
Net.SendWorker.Start();
}
private void timer1_Tick(object sender, EventArgs e)
{
if (udp != null)
{
udp.Update();
}
if (audioDevicecomboBox1.SelectedItem != null)
{
var device = (MMDevice)audioDevicecomboBox1.SelectedItem;
progressBar1.Value = (int)(Math.Round(device.AudioMeterInformation.MasterPeakValue * 100 + 0.5));
//使用udp发送这个音量浮点值到UE
//CpLib是我以前为了提升开发效率做的一个小功能库,使用udp代码发送数据的 代码实现一下发送功能就可以了.
udp.SendToIpPort(ipPort, device.AudioMeterInformation.MasterPeakValue.ToString());
}
}
private void AudioPoint_FormClosing(object sender, FormClosingEventArgs e)
{
if (udp != null)
{
udp.Close();
}
Net.SendWorker.Close();
}
private void ipPortTextBox1_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode != Keys.Enter) return;
ipPort = ipPortTextBox1.Text;
}
}
}
UE5部分,因为我是在以前有一点点功能的基础上改了一下用的,所以有点不相关的代码.其实呢,这里建议你们找一下UE5有个udp插件,直接提供udp的蓝图使用方式,会更简单,没必要去折腾这个代码,就是监听一个udp端口拿到数据.
AIVision.h
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include “CoreMinimal.h”
#include “Common/UdpSocketBuilder.h”
#include “Common/UdpSocketReceiver.h”
#include “Kismet/BlueprintFunctionLibrary.h”
#include “AIVision.generated.h”
class CppUdp
{
public:
CppUdp(int _port=0,FString name=”CppUdp”);
~CppUdp();
int port=0;
TSharedPtr<FInternetAddr> RemoteAddr;
FIPv4Endpoint remotePoint=FIPv4Endpoint();
void send(FString str);
void update();
void close();
FSocket *udpSocket=nullptr;
private:
FUdpSocketReceiver* udpReceiver=nullptr;
FCriticalSection m_mutex;
void OnUdpReceiver(const FArrayReaderPtr& readerPtr, const FIPv4Endpoint& endPoint);
};
/**
*
*/
UCLASS()
class CPPAUDIO2FACE_API UAIVision : public UBlueprintFunctionLibrary
{
GENERATED_BODY()
public:
UFUNCTION(BlueprintCallable,Category=CppAi)
static void start();
UFUNCTION(BlueprintCallable,Category=CppAi)
static void close();
UFUNCTION(BlueprintCallable,Category=CppAi)
static void getFacePos(TArray<float>& data);
UFUNCTION(BlueprintCallable,Category=CppAi)
static float getAudioVal();//蓝图中获取udp传递过来的系统音量值 0.0-1.0
static void readStr(const FString str);
private:
static CppUdp* cppUdp;
static TArray<float> raw;
static FCriticalSection m_mutex;
static float audioVal;
};
AIVision.cpp
// Fill out your copyright notice in the Description page of Project Settings.
#include “AIVision.h”
CppUdp* UAIVision::cppUdp = nullptr;
TArray<float> UAIVision::raw = TArray<float>();
FCriticalSection UAIVision::m_mutex;
float UAIVision::audioVal = 0;
CppUdp::CppUdp(int _port, FString name)
{
port = _port;
port = 4600;
FUdpSocketBuilder* builder = new FUdpSocketBuilder(TEXT(“CppUdp”));
//开启广播功能
builder->WithBroadcast();
FIPv4Address addrIp;
FIPv4Endpoint bindEndpoint;
//把字符串地址转换成对象
bool b = FIPv4Address::Parse(TEXT(“127.0.0.1”), addrIp);
bindEndpoint.Address = addrIp;
bindEndpoint.Port = 4600;
//bind 一个端口,这样才能接受消息
builder->BoundToEndpoint(bindEndpoint);
udpSocket = builder->Build();
if (udpSocket == nullptr)
{
//Log::w(“CppUdp bind fail ,port= “+0);
return;
}
FTimespan waitTime(1 / 120);
if (udpSocket)
{
udpReceiver = new FUdpSocketReceiver(udpSocket, waitTime, TEXT(“AOctLiveClient”));
//绑定接收消息的回调函数
udpReceiver->OnDataReceived().BindRaw(this, &CppUdp::OnUdpReceiver);
udpReceiver->Start();
}
FIPv4Address centerAddrIp;
b = FIPv4Address::Parse(TEXT(“127.0.0.1”), centerAddrIp);
RemoteAddr = ISocketSubsystem::Get(PLATFORM_SOCKETSUBSYSTEM)->CreateInternetAddr();
RemoteAddr->SetIp(centerAddrIp.Value);
RemoteAddr->SetPort(4700);
}
CppUdp::~CppUdp()
{
}
void CppUdp::send(FString str)
{
if (udpSocket == nullptr)return;
}
void CppUdp::update()
{
}
void CppUdp::close()
{
m_mutex.Lock();
if (udpReceiver != nullptr)
{
udpReceiver->Stop();
//udpReceiver->Exit();
udpReceiver = nullptr;
}
if (udpSocket != nullptr)
{
udpSocket->Close();
udpSocket = nullptr;
}
m_mutex.Unlock();
}
void CppUdp::OnUdpReceiver(const FArrayReaderPtr& readerPtr, const FIPv4Endpoint& endPoint)
{
int size = readerPtr->Num() + 1;
uint8* data2 = new uint8[size];
FMemory::Memzero(data2, size);
FMemory::Memcpy(data2, readerPtr->GetData(), readerPtr->Num());
const FString str = UTF8_TO_TCHAR(reinterpret_cast<const char*>(data2));
UAIVision::readStr(str);
delete[] data2;
remotePoint = endPoint;
}
void UAIVision::start()
{
cppUdp = new CppUdp(4600);
}
void UAIVision::close()
{
if (cppUdp != nullptr)
{
cppUdp->close();
delete cppUdp;
}
}
void UAIVision::getFacePos(TArray<float>& data)
{
m_mutex.Lock();
data.Empty();
for (auto f : raw)
{
data.Add(f);
}
m_mutex.Unlock();
}
float UAIVision::getAudioVal()
{
float v = 0;
m_mutex.Lock();
v = audioVal;
m_mutex.Unlock();
return v;
}
void UAIVision::readStr(const FString str)
{
m_mutex.Lock();
audioVal = FCString::Atof(*str);
m_mutex.Unlock();
}
动画蓝图中获取音量值
modify curve节点使用音量值修改jawOpen的值,这是张嘴幅度,效果low到爆,哈哈哈哈
总结:说实话,效果好主要是使用了UE5的metahuman资产和都市人群的表情动作,让人物很生动,其他的就是调接口转来转去,没啥含金量,放出来给大家参考吧,python部分是可以单独运行的,语音对话功能,c#和UE5只是利用系统的声音做了个假假的表演而已,非常非常临时敷衍的拼凑