基于GPT-3.5-Instruct编写的JSON翻译

梦汐发表于 2023-9-24 16:16

本帖最后由梦汐于 2023-9-24 16:18 编辑

```javaScript

function Transl(str, tl) {
str = str instanceof Array ? str : str =
tl = undefined ? "Chinese" : tl
console.log(`Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`);
return new Promise(
   complete => {
         GM_xmlhttpRequest({
            method: "POST",
            url: "https://填你的代{过}{滤}理域名/v1/completions",
            headers: {
               "Content-Type": "application/json",
               "Authorization": "Bearer 填你的key"
            },
            data: JSON.stringify({
               "model": "gpt-3.5-turbo-instruct",
               "prompt": `Translate to ${tl}, save as JSON, with translation results as key-value pairs:${JSON.stringify(str, null, 0)}`,
               "max_tokens": 1024,//max 4096
               "temperature": 0.2
            }),
            onload: function (response) {
               var data = JSON.parse(response.responseText);
               console.log(data);
               complete(data.choices.text)
            }
         });
   }
)

}
let sl = [
"hi",
"save uit",
"4u"
]
console.log(await Transl(sl, "Chinese"));
```
```python
import json
import openai
import tiktoken
import tkinter as tk
from tkinter import filedialog

class TranslationArray:
def __init__(self, base_url: str, api_key: str):
   # Initialize the base URL and API key for OpenAI API
   openai.api_base = base_url
   openai.api_key = api_key

def translate(self, dictionary: dict, target_language: str):
   # Calculate the token count needed for the text
   def get_token_count(text: str) -> int:
         encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-instruct")
         return len(encoding.encode(text))

   # Chunk the dictionary, with each chunk having no more than 1200 tokens
   def chunk_submit(lst: list, threshold: int = 1200) -> dict:
         pack = {"chunked": [], "small_blocks": []}
         for i in range(len(lst)):
            pack["small_blocks"].append(lst)
            tk_count = get_token_count(json.dumps(
               pack["small_blocks"], ensure_ascii=False))
            if i + 1 != len(lst):
               mt_count = get_token_count(json.dumps(
                     pack["small_blocks"] + ], ensure_ascii=False))
               if mt_count > threshold:
                     pack["chunked"].append(pack["small_blocks"])
                     pack["small_blocks"] = []
            else:
               pack["chunked"].append(pack["small_blocks"])
         return pack["chunked"]

   # Translate the keys in the dictionary to the target language
   keys = dictionary.keys()
   lst = []
   for key in keys:
         lst.append(key)
   chunks = chunk_submit(lst, 1200)
   export = {}
   for chunk in chunks:
         body = f"Translate to {target_language or 'Chinese'}, save as JSON, with translation results as key-value pairs:" + \
            json.dumps(chunk)
         tokens = 4096 - get_token_count(body)
         complete = openai.Completion.create(
            model="gpt-3.5-turbo-instruct",
            prompt=body,
            max_tokens=tokens,
            temperature=0.2
         )
         # Add the translation result to the output dictionary
         export.update(json.loads(complete["choices"]["text"]))
   return export

# Select the JSON file to be translated
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(filetypes=[("JSON Files", "*.json")])

# Read the data from the JSON file
with open(file_path, encoding="utf-8") as f:
data = json.load(f)

# Translate the data to Chinese
translator = TranslationArray("your base","your key")
data = translator.translate(data, "Chinese")

# Write the translation result to the original file
with open(file_path, "w", encoding="utf-8") as outfile:
outfile.write(json.dumps(data, ensure_ascii=False))

print("save", file_path)
```

marco527 发表于 2023-9-24 16:30

GPT-3.5是21年9月份之前的，总感觉有点生硬。不过还是要支持一下楼主。

waxxy 发表于 2023-9-24 23:21

感谢分享，这个跟谷歌或是Deepl相比较，会好些吗？

页: [1]

吾爱破解 - 52pojie.cn's Archiver

基于GPT-3.5-Instruct编写的JSON翻译