接着,使用 ERNIE SDK 调用文心大模型能力,获得图像关键信息,生成合乎逻辑、适用于不同年龄段、不同兴趣受众的生动且正能量的精彩故事。
最后,使用长文本语音合成 API 完成对生成故事的语音合成,提供多种语音角色,满足不同受众需求。
总的来说,整个过程分为图片关键信息提取、故事生成和语音合成三个步骤。除此之外,凭借 ERNIE SDK 提供的文生图功能,还可以对用户提供的图片进行二次创作。
-
from PIL import Image from clip_interrogator import Config, Interrogator image_path = '/home/aistudio/launch/images/test02.jpg' image = Image.open(image_path).convert('RGB') ci = Interrogator(Config(clip_pretrained_model_name_or_path="openai/clip-vit-large-patch14")) print(ci.interrogate_fast(image))print(ci.interrogate_fast(image))
# 创建单轮对话
eb.api_type = 'aistudio'
eb.access_token = "ebe9194da1106097f52ada09cb403b91546648ca"
audience = '成人'
prompt = f"""我给你一个简单的图片说明,请为{audience}观众生成一个与图片非常吻合的虚构故事。请为我生成有创意,正能量并且符合{audience}观众价值观和个人情感的很酷的虚构故事。图片描述如下:'{result_zh}'"""
def generate(prompt):
chat_completion = eb.ChatCompletion.create(
model='ernie-4',
messages=[{'role': 'user', 'content':prompt}],
)
return chat_completion.result
generate(prompt)
import json
from bs4 import BeautifulSoup
import time
def get_access_token(api_key, secrte_key):
"""
使用 AK,SK 生成鉴权签名(Access Token)
:return: access_token,或是None(如果错误)
"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": api_key, "client_secret": secrte_key}
return str(requests.post(url, params=params).json().get("access_token"))
# 创建词典数据
data = {
"度小宇": 1,
"度小美": 0,
"度逍遥(基础)": 3,
"度丫丫": 4,
"度逍遥(精品)": 5003,
"度小鹿": 5118,
"度博文": 106,
"度小童": 110,
"度小萌": 111,
"度米朵": 103,
"度小娇": 5
}
# 定义处理函数,根据选择按钮的值返回对应的词典数据
def handle_selection(key):
return data[key]
def get_taskid(api_key, secrte_key, prompt, voice, speed, pitch, volume):
create_url = "https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token=" + get_access_token(api_key, secrte_key)
create_payload = json.dumps({
"text": prompt, #待合成的文本
"format": "wav", #音频格式
"voice": handle_selection(voice), #音库
"lang": "zh", #语言,固定zh
"speed": speed, #语速
"pitch": pitch, #音调
"volume": volume, #音量
"enable_subtitle": 2, #是否开启字幕时间戳,取值范围0, 1, 2
"break": 5000 #段落间隔
})
create_headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
create_response = requests.request("POST", create_url, headers=create_headers, data=create_payload)
create_data = json.loads(create_response.text)
task_id = create_data['task_id']
# print(task_id)
# with open("task_id.txt", "w") as file:
# # 清空文件内容
# # file.truncate(0)
# # 将task_id写入文件
# file.write(task_id)
return task_id
def get_speechurl(api_key, secrte_key, prompt, voice, speed, pitch, volume):
query_url = "https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token=" + get_access_token(api_key, secrte_key)
task_id = get_taskid(api_key, secrte_key, prompt, voice, speed, pitch, volume)
query_payload = json.dumps({
"task_ids": [
task_id #create获取的task_id
]
})
query_headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
# 循环检查任务状态,直到任务成功
while True:
query_response = requests.request("POST", query_url, headers=query_headers, data=query_payload)
query_data = json.loads(query_response.text)
print(query_data)
# 检查任务状态
if query_data['tasks_info'][0]['task_status'] == 'Success':
speech_url = query_data['tasks_info'][0]['task_result']['speech_url']
return speech_url
break
def exchange_speech(api_key, secrte_key, prompt, voice, speed, pitch, volume):
speech_url = get_speechurl(api_key, secrte_key, prompt, voice, speed, pitch, volume)
html = f'''
<video controls="" autoplay="" name="media">
<source src={speech_url} type="audio/x-wav">
</video>
'''
# 使用 BeautifulSoup 提取音频 URL
soup = BeautifulSoup(html, 'html.parser')
audio_url = soup.find('source')['src']
# 使用 requests 下载音频文件
response = requests.get(audio_url)
filename = "output_audio.wav"
# 保存至本地文件
with open(filename, 'wb') as audio_file:
audio_file.write(response.content)
return f"{filename}"
原标题:《只需三步,开发文心一言应用帮你建立情感纽带!》
来自:微信公百度AI