基于GPT-3.5-Instruct编写的JSON翻译
本帖最后由 梦汐 于 2023-9-24 16:18 编辑```javaScript
function Transl(str, tl) {
str = str instanceof Array ? str : str =
tl = undefined ? "Chinese" : tl
console.log(`Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`);
return new Promise(
complete => {
GM_xmlhttpRequest({
method: "POST",
url: "https://填你的代{过}{滤}理域名/v1/completions",
headers: {
"Content-Type": "application/json",
"Authorization": "Bearer 填你的key"
},
data: JSON.stringify({
"model": "gpt-3.5-turbo-instruct",
"prompt": `Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`,
"max_tokens": 1024,//max 4096
"temperature": 0.2
}),
onload: function (response) {
var data = JSON.parse(response.responseText);
console.log(data);
complete(data.choices.text)
}
});
}
)
}
let sl = [
"hi",
"save uit",
"4u"
]
console.log(await Transl(sl, "Chinese"));
```
```python
import json
import openai
import tiktoken
import tkinter as tk
from tkinter import filedialog
class TranslationArray:
def __init__(self, base_url: str, api_key: str):
# Initialize the base URL and API key for OpenAI API
openai.api_base = base_url
openai.api_key = api_key
def translate(self, dictionary: dict, target_language: str):
# Calculate the token count needed for the text
def get_token_count(text: str) -> int:
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-instruct")
return len(encoding.encode(text))
# Chunk the dictionary, with each chunk having no more than 1200 tokens
def chunk_submit(lst: list, threshold: int = 1200) -> dict:
pack = {"chunked": [], "small_blocks": []}
for i in range(len(lst)):
pack["small_blocks"].append(lst)
tk_count = get_token_count(json.dumps(
pack["small_blocks"], ensure_ascii=False))
if i + 1 != len(lst):
mt_count = get_token_count(json.dumps(
pack["small_blocks"] + ], ensure_ascii=False))
if mt_count > threshold:
pack["chunked"].append(pack["small_blocks"])
pack["small_blocks"] = []
else:
pack["chunked"].append(pack["small_blocks"])
return pack["chunked"]
# Translate the keys in the dictionary to the target language
keys = dictionary.keys()
lst = []
for key in keys:
lst.append(key)
chunks = chunk_submit(lst, 1200)
export = {}
for chunk in chunks:
body = f"Translate to {target_language or 'Chinese'}, save as JSON, with translation results as key-value pairs:" + \
json.dumps(chunk)
tokens = 4096 - get_token_count(body)
complete = openai.Completion.create(
model="gpt-3.5-turbo-instruct",
prompt=body,
max_tokens=tokens,
temperature=0.2
)
# Add the translation result to the output dictionary
export.update(json.loads(complete["choices"]["text"]))
return export
# Select the JSON file to be translated
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(filetypes=[("JSON Files", "*.json")])
# Read the data from the JSON file
with open(file_path, encoding="utf-8") as f:
data = json.load(f)
# Translate the data to Chinese
translator = TranslationArray("your base","your key")
data = translator.translate(data, "Chinese")
# Write the translation result to the original file
with open(file_path, "w", encoding="utf-8") as outfile:
outfile.write(json.dumps(data, ensure_ascii=False))
print("save", file_path)
``` GPT-3.5是21年9月份之前的,总感觉有点生硬。不过还是要支持一下楼主。 感谢分享,这个跟谷歌或是Deepl相比较,会好些吗?
页:
[1]