【azure-openai】批量翻译demo【python】【gradio】
要求:拥有azure-openai-api,上传文件为csv格式,utf-8编码。
注意:如果出现乱码,重新运行,换种方式打开,有时候wps会自动改编码。
实现功能:选择语言,使用gpt4omini(可改)进行翻译,翻译某一列,把翻译后的内容放到某一列,翻译后的文件可浏览器下载,适合验证翻译质量。
页面展示:
代码:
handlergpt.py
from openai.lib.azure import AzureOpenAI
api_key = "sk-...."
def get_response_gpt(sys_msg, input_new):
client = AzureOpenAI(
azure_endpoint="https://....openai.azure.com/",
api_key="....", ##省略号的内容都需要azure官网获取。详见主页博客
api_version="2024-06-01"
)
messages = [
{"role": "system", "content": sys_msg},
{"role": "user", "content": input_new}
]
response = client.chat.completions.create(
model="4omini",
messages=messages
)
return response.choices[0].message.content.strip()
def bai2en(Input, language):
sys_msg = (f"The current language of translation is <{language}>. "
"Translate only the text while maintaining the original punctuation. "
"Output only the translated text without any additional explanations. "
"''contains the content you want to translate."
"Only output the content in ''. Any other explanatory statements are prohibited."
"Here is the text to be translated:\n"
)
# sys_msg = (f"The current language of translation is English. "
# "Translate only the text while maintaining the original punctuation. "
# "Output only the translated text without any additional explanations. "
# "''contains the content you want to translate."
# "Only output the content in ''. Any other explanatory statements are prohibited."
# "Here is the text to be translated:\n"
# )
if Input:
input_new = Input
return str(get_response_gpt(sys_msg, input_new))
else:
return "文本有误,重新输入"
def handler(input,language):
Output = bai2en(input,language)
return {"Output": Output}
if __name__ == '__main__':
Input = input()
print(bai2en(Input))
main.py
import os
import csv
import gradio as gr
from handlergpt import handler
# 批量翻译,无延时
def process_csv_file(csv_filepath, output_filepath, language, source_column, target_column):
output_log = [] # 用于存储输出日志
with open(csv_filepath, 'r', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
headers = next(reader) # 读取表头
# 确保表头列表足够长
max_column = max(source_column, target_column)
if len(headers) < max_column:
headers.extend([''] * (max_column - len(headers)))
# 设置目标列的表头为语言参数
headers[target_column - 1] = language
translated_rows = []
row_count = 0 # 计数器,用于输出当前处理的行数
for row in reader:
row_count += 1
# 确保每一行的列数足够
if len(row) < max_column:
row.extend([''] * (max_column - len(row))) # 扩展到所需列数
body = row[source_column - 1].strip() # 获取指定列的内容
if not body:
translated_rows.append(row) # 保持空行
log_message = f"Row {row_count}: Empty body in source column {source_column}, skipped translation."
output_log.append(log_message)
print(log_message)
continue
formatted_body = format_body(body)
# 传递 language 参数到 handler
translated_body = handler(formatted_body, language)['Output']
# 插入翻译内容到指定目标列
row[target_column - 1] = translated_body
translated_rows.append(row)
# 输出当前处理的进度
log_message = f"Row {row_count}: Translated body from '{body[:30]}...' to '{translated_body[:30]}...'"
output_log.append(log_message)
print(log_message) # 控制台输出
# 保存结果到新的 CSV 文件,包含修改后的表头
with open(output_filepath, 'w', newline='', encoding='utf-8') as outfile:
writer = csv.writer(outfile)
writer.writerow(headers) # 写入表头
writer.writerows(translated_rows)
completion_message = f"Translation complete. File saved to {output_filepath}"
output_log.append(completion_message)
print(completion_message) # 控制台输出
return "\n".join(output_log) # 将日志作为结果返回给 Gradio
def format_body(body):
# 对正文进行格式化处理
return body
# Gradio UI函数
def translate_file(csv_file, language, source_column, target_column):
if csv_file is None:
return "Please upload a CSV file."
# 为了确保文件保存为本地可下载文件,使用临时文件路径
output_filepath = os.path.join(os.getcwd(), "自定义路径.csv")
csv_filepath = csv_file.name
# 调用翻译函数
process_csv_file(csv_filepath, output_filepath, language, int(source_column), int(target_column))
# 返回生成的文件路径,供用户下载
return output_filepath
# 创建 Gradio 界面
def main():
# 定义语言选择、源列、目标列和文件上传
language = gr.Dropdown(
choices=["English", "Chinese", "Spanish", "Portuguese", "French", "German", "Italian", "Japanese", "Korean",
"Hindi", "Bengali", "Russian", "Indonesian", "Vietnamese", "Dutch", "Hungarian", "Turkish", "Polish"], ##可以添加语言,无特殊标志符,只要gpt能懂就行
label="Target Language")
csv_file_path = gr.File(label="CSV File to Translate")
source_column = gr.Number(label="Source Column (e.g., 1 for first column)", value=1, precision=0)
target_column = gr.Number(label="Target Column (e.g., 3 for third column)", value=3, precision=0)
# 创建 Gradio 界面,允许用户上传文件并提供下载
iface = gr.Interface(
fn=translate_file,
inputs=[csv_file_path, language, source_column, target_column],
outputs=gr.File(label="Download Translated CSV"), # 设置为文件下载类型
title="CSV Translation Tool",
description="Upload a CSV file, choose a target language, specify source and target columns, and download the translated CSV."
)
iface.launch(server_name="192.xxx.xx.xx", server_port=xxxx) # 允许局域网访问,设置端口
if __name__ == '__main__':
main()