Skip to content

Commit

Permalink
Upgrade: Batch translation support
Browse files Browse the repository at this point in the history
  • Loading branch information
xgd16 committed Apr 30, 2024
1 parent 9b1e142 commit d15a7fb
Show file tree
Hide file tree
Showing 19 changed files with 408 additions and 219 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@
- 在配置多个 API 时如果调用当前 API 失败自动切换到下一个
- 可以将翻译过的内容写入 `Redis` `Memory` 缓存重复翻译内容降低翻译 API 重复调用

## 批量翻译支持情况

| 平台 | 是否支持批量翻译 | 是否完美支持 | 准确的源语言 | 备注 |
| :-----: | :--------------: | :----------: | :----------: | :----------------------------------------------------------: |
| 百度 |||| 不支持精确返回具体每条结果的源语言类型 |
| Google |||| |
| 有道 |||| 源语言类型识别不准确 |
| 火山 |||| |
| Deepl |||| 源语言类型识别不准确 |
| 讯飞 |||| 官方不支持批量翻译通过特殊字符 № 切割实现 且 可能出现结果非多条 |
| PaPaGo |||| 基于 \n 切割实现 且不可识别不同的源语言类型 |
| ChatGPT |||| |

## 未来支持 (优先级按照顺序,打勾为已实现) ✈️
- [x] 持久化已翻译到 `MySQL`
- [x] web 控制页面
Expand Down
13 changes: 13 additions & 0 deletions README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ Optional
- Automatically switches to the next API if the current API call fails when configuring multiple APIs.
- Can write translated content into `Redis` `Memory` cache to reduce repetitive calls to translation APIs.

## Batch Translation Support

| Platform | Batch Translation Support | Perfect Support | Accurate Source Language | Note |
| :------: | :-----------------------: | :-------------: | :----------------------: | :-------------------------------------------------------------------------------------------------------------------------------------- |
| Baidu | Yes | No | No | Does not support accurate return of specific source language for each result |
| Google | Yes | Yes | Yes | |
| Youdao | Yes | No | No | Source language identification is not accurate |
| Huoshan | Yes | Yes | Yes | |
| Deepl | Yes | No | No | Source language identification is not accurate |
| iFly | Yes | No | No | Officially does not support batch translation, implemented through special character № splitting and may result in non-multiple outputs |
| PaPaGo | Yes | No | No | Implemented based on \n splitting and cannot recognize different source language types |
| ChatGPT | Yes | Yes | Yes | |

## Future Support (prioritized, checked means implemented) ✈️
- [x] Persist translated content to `MySQL`.
- [x] Web control panel.
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func baseInit() {
// 开启翻译支持
translate.InitTranslate()
// 初始化 chatGPT 需要的数据
global.ChatGPTLangConfig = gjson.MustEncodeString(translate.BaseTranslateConf[translate.ChatGptTranslateMode])
translate.ChatGPTLangConfig = gjson.MustEncodeString(translate.BaseTranslateConf[translate.ChatGptTranslateMode])
// 初始化缓冲区
if err := buffer.Buffer.Init(false); err != nil {
panic(err)
Expand Down
9 changes: 5 additions & 4 deletions src/buffer/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"sync"
"uniTranslate/src/global"
queueHandler "uniTranslate/src/service/queue/handler"
"uniTranslate/src/translate"
"uniTranslate/src/types"

"github.com/gogf/gf/v2/net/ghttp"
Expand Down Expand Up @@ -36,15 +37,15 @@ func (t *BufferType) GetIdx() [][]int {
return t.idx
}

func (t *BufferType) Handler(r *ghttp.Request, from, to, text, platform string, fn func(*types.TranslatePlatform, string, string, string) (*types.TranslateData, error)) (s *types.TranslateData, e error) {
func (t *BufferType) Handler(r *ghttp.Request, req *translate.TranslateReq, fn func(config *types.TranslatePlatform, req *translate.TranslateReq) (*types.TranslateData, error)) (s *types.TranslateData, e error) {
t.m.Lock()
var bufferArr BufferArrInterface
if platform == "" {
if req.Platfrom == "" {
bufferArr = new(RandomSortBufferArr)
} else {
bufferArr = new(PlatformSortBufferArr)
}
bufferArr.Init(t, platform)
bufferArr.Init(t, req.Platfrom)
t.m.Unlock()
// 创建上下文
ctx := gctx.New()
Expand All @@ -61,7 +62,7 @@ func (t *BufferType) Handler(r *ghttp.Request, from, to, text, platform string,
// 释放锁
t.m.Unlock()
// 调用处理
t, err := fn(p, from, to, text)
t, err := fn(p, req)
if err != nil {
e = fmt.Errorf("调用翻译失败 %s", err)
queueHandler.RequestRecordQueue.Push(&types.RequestRecordData{
Expand Down
2 changes: 1 addition & 1 deletion src/global/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ var GfCache *gcache.Cache

var StatisticalProcess types.StatisticsInterface = new(types.MySqlStatistics)

var ChatGPTLangConfig string


// 是否将缓存写入存储
var CacheWriteToStorage = false
Expand Down
2 changes: 1 addition & 1 deletion src/service/cron/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func Service() {
g.Log().Error(ctx, "清理请求记录失败", err)
}
}()
g.Log().Infof(ctx, "每2小时执行一次 清理请求记录")
g.Log().Infof(ctx, "每1小时执行一次 清理请求记录")
if err := clearRequestRecord(ctx); err != nil {
g.Log().Error(ctx, "清理请求记录失败", err)

Expand Down
21 changes: 12 additions & 9 deletions src/service/web/controller/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,19 @@ func Translate(r *ghttp.Request) {
toT := r.Get("to")
textT := r.Get("text")
platform := r.Get("platform").String()
batch := r.Get("batch", false).Bool()
x.FastResp(r, fromT.IsEmpty() || toT.IsEmpty() || textT.IsEmpty(), false).Resp("参数错误")
x.FastResp(r, platform != "" && !xlib.InArr(platform, translate.TranslateModeList), false).Resp("不支持的平台")
from := fromT.String()
to := toT.String()
x.FastResp(r, to == "auto", false).Resp("转换后语言不支持 auto")
text := textT.String()
text := textT.Strings()
// 内容转换为md5
var keyStr string
if global.CachePlatform {
keyStr = fmt.Sprintf("to:%s-text:%s-platform:%s", to, text, platform)
} else {
keyStr = fmt.Sprintf("to:%s-text:%s", to, text)
}
if batch {
keyStr += "-batch"
}
md5 := gmd5.MustEncrypt(keyStr)
// 写入到缓存
var (
Expand All @@ -54,14 +50,21 @@ func Translate(r *ghttp.Request) {
)
// 记录从翻译到获取到结果的时间
startTime := gtime.Now().UnixMilli()
req := &translate.TranslateReq{
From: from,
To: to,
Platfrom: platform,
Text: text,
TextStr: gstr.Join(text, "\n"),
}
// 判断是否进行缓存
if global.CacheMode == "off" {
var dataAny any
dataAny, err = t(r, from, to, text, platform)
dataAny, err = t(r, req)
data = gvar.New(dataAny)
} else {
data, err = global.GfCache.GetOrSetFunc(r.GetCtx(), fmt.Sprintf("Translate:%s", md5), func(ctx context.Context) (value any, err error) {
return t(r, from, to, text, platform)
return t(r, req)
}, 0)
}
endTime := gtime.Now().UnixMilli()
Expand Down Expand Up @@ -132,9 +135,9 @@ func RefreshConfigCache(r *ghttp.Request) {
x.FastResp(r).Resp()
}

func t(r *ghttp.Request, from, to, text, platform string) (value any, err error) {
func t(r *ghttp.Request, req *translate.TranslateReq) (value any, err error) {
var data *types.TranslateData
data, err = buffer.Buffer.Handler(r, from, to, text, platform, handler.Translate)
data, err = buffer.Buffer.Handler(r, req, handler.Translate)
value = data

if data != nil {
Expand Down
18 changes: 8 additions & 10 deletions src/service/web/handler/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,25 @@ import (
)

// Translate 翻译
func Translate(config *types.TranslatePlatform, OriginalFrom, OriginalTo, text string) (data *types.TranslateData, err error) {
func Translate(config *types.TranslatePlatform, req *translate.TranslateReq) (data *types.TranslateData, err error) {
// 获取翻译平台
t, err := translate.GetTranslate(config.Type, config.Cfg)
if err != nil {
return
}
// 翻译
translateTextArr, from, err := t.Translate(OriginalFrom, OriginalTo, text)
resp, err := t.Translate(req)
if err != nil {
return
}
// 返回数据
data = &types.TranslateData{
OriginalText: text,
OriginalTextMd5: gmd5.MustEncrypt(text),
TranslateTextArr: translateTextArr,
From: from,
To: OriginalTo,
Platform: config.Type,
OriginalTextLen: gstr.LenRune(text),
TranslationLen: gstr.LenRune(gstr.Join(translateTextArr, "")),
OriginalText: req.TextStr,
OriginalTextMd5: gmd5.MustEncrypt(req.TextStr),
Translate: resp,
To: req.To,
Platform: config.Type,
OriginalTextLen: gstr.LenRune(gstr.Replace(req.TextStr, "\n", "")),
}
return
}
60 changes: 33 additions & 27 deletions src/translate/baidu.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package translate

import (
"encoding/json"
"errors"
"fmt"
"time"

"github.com/gogf/gf/v2/container/gvar"
"github.com/gogf/gf/v2/crypto/gmd5"
"github.com/gogf/gf/v2/encoding/gjson"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/os/gctx"
"github.com/gogf/gf/v2/os/gtime"
Expand All @@ -21,31 +20,42 @@ type BaiduConfigType struct {
Key string `json:"key"`
}

func (t *BaiduConfigType) Translate(from, to, text string) (result []string, fromLang string, err error) {
type BaiduHTTPTranslateResp struct {
From string `json:"from"`
To string `json:"to"`
TransResult []TransResult `json:"trans_result"`
}

type TransResult struct {
Src string `json:"src"`
Dst string `json:"dst"`
}

func (t *BaiduConfigType) Translate(req *TranslateReq) (resp []*TranslateResp, err error) {
if t == nil || t.Url == "" || t.AppId == "" || t.Key == "" {
err = errors.New("百度翻译配置异常")
return
}
mode := t.GetMode()
// 语言标记转换
from, err = SafeLangType(from, mode)
from, err := SafeLangType(req.From, mode)
if err != nil {
return
}
to, err = SafeLangType(to, mode)
to, err := SafeLangType(req.To, mode)
if err != nil {
return
}
salt := gtime.Now().UnixMilli()
signStr := fmt.Sprintf("%s%s%d%s", t.AppId, text, salt, t.Key)
signStr := fmt.Sprintf("%s%s%d%s", t.AppId, req.TextStr, salt, t.Key)
sign, err := gmd5.EncryptString(signStr)
// 处理MD5加密失败
if err != nil {
return
}
// 发起请求
post, err := g.Client().SetTimeout(time.Duration(t.CurlTimeOut)*time.Millisecond).Post(gctx.New(), t.Url, g.Map{
"q": text,
"q": req.TextStr,
"from": from,
"to": to,
"appid": t.AppId,
Expand All @@ -59,34 +69,30 @@ func (t *BaiduConfigType) Translate(from, to, text string) (result []string, fro
// 推出函数时关闭链接
defer func() { _ = post.Close() }()
// 返回的json解析
respStr := post.ReadAllString()
respByte := post.ReadAll()
// 判断状态码
if post.StatusCode != 200 {
err = fmt.Errorf("请求失败 状态码: %d 返回结果: %s", post.StatusCode, respStr)
err = fmt.Errorf("请求失败 状态码: %d 返回结果: %s", post.StatusCode, respByte)
return
}
json, err := gjson.DecodeToJson(respStr)
// 处理json错误
if err != nil {
httpResp := new(BaiduHTTPTranslateResp)
if err = json.Unmarshal(respByte, httpResp); err != nil {
return
}
// 判断获取到的数据是否正常
if json.Get("trans_result").IsEmpty() {
err = fmt.Errorf("请求数据异常 账号: %s 返回结果: %s", t.AppId, respStr)
return
}
// 循环获取数据
var arr []string
for _, v := range json.Get("trans_result").Maps() {
arr = append(arr, gvar.New(v["dst"], true).String())
}

lang, err := GetYouDaoLang(json.Get("from").String(), mode)
if err != nil {
return
resp = make([]*TranslateResp, 0)
for _, item := range httpResp.TransResult {
lang, err1 := GetYouDaoLang(httpResp.From, mode)
if err1 != nil {
err = err1
return
}
resp = append(resp, &TranslateResp{
Text: item.Dst,
FromLang: lang,
})
}

return arr, lang, nil
return
}

func (t *BaiduConfigType) GetMode() string {
Expand Down
48 changes: 36 additions & 12 deletions src/translate/chatGPT.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,68 @@ package translate

import (
"context"
"encoding/json"
"errors"
"fmt"
"uniTranslate/src/global"

"github.com/gogf/gf/v2/container/gvar"
"github.com/sashabaranov/go-openai"
)

var ChatGPTLangConfig string

type ChatGptConfigType struct {
Key string `json:"key"`
}

func (t *ChatGptConfigType) Translate(from, to, text string) (result []string, fromLang string, err error) {
type ChatGPTHTTPTranslateResp []ChatGPTHTTPTranslateRespElement

type ChatGPTHTTPTranslateRespElement struct {
FromLang string `json:"fromLang"`
Text string `json:"text"`
}

func (t *ChatGptConfigType) Translate(req *TranslateReq) (resp []*TranslateResp, err error) {
if t.Key == "" {
return nil, "", errors.New("chatGPT翻译配置异常")
err = errors.New("chatGPT翻译配置异常")
return
}
mode := t.GetMode()
// 语言标记转换
from, err = SafeLangType(from, mode)
from, err := SafeLangType(req.From, mode)
if err != nil {
return
}
to, err = SafeLangType(to, mode)
to, err := SafeLangType(req.To, mode)
if err != nil {
return
}
// google auto = ""
if from == "auto" {
from = ""
}
result = make([]string, 0)
gptResp, err := SendToChatGpt(t.Key, fmt.Sprintf("将[%s]翻译成%s按照格式{\"fromLang\":\"源语言\",\"text\":\"翻译结果\"}返回给我fromLang有这几种语言直接给我返回对应的key位置%s不需要其他任何回复严格按照我给你的格式翻译结果不要用[]包着", text, to, global.ChatGPTLangConfig))
gptResp, err := SendToChatGpt(t.Key, fmt.Sprintf("接下来模拟你是一个批量翻译接口你将 [%s] 数组数据批量翻译为 %s 返回数据结构为[ [{\"fromLang\":\"源语言\",\"text\":\"翻译结果\"}] ]标准的压缩数组json结构返回给我fromLang有这几种语言直接给我返回对应的key位置%s不需要其他任何回复严格按照我给你的格式翻译结果不要用[]包着", func() (str string) {
for k, v := range req.Text {
if k == 0 {
str = fmt.Sprintf("\"%s\"", v)
} else {
str = fmt.Sprintf("%s,\"%s\"", str, v)
}
}
return
}(), to, ChatGPTLangConfig))
if err != nil {
return
}
respData := gvar.New(gptResp).MapStrVar()
result = append(result, respData["text"].String())
fromLang, err = GetYouDaoLang(respData["fromLang"].String(), mode)
httpResp := new(ChatGPTHTTPTranslateResp)
if err = json.Unmarshal([]byte(gptResp), httpResp); err != nil {
return
}
resp = make([]*TranslateResp, 0)
for _, item := range *httpResp {
resp = append(resp, &TranslateResp{
Text: item.Text,
FromLang: item.FromLang,
})
}
return
}

Expand Down
Loading

0 comments on commit d15a7fb

Please sign in to comment.