diff --git a/g/encoding/gcharset/gcharset.go b/g/encoding/gcharset/gcharset.go index e235a765b..39e8ad3d9 100644 --- a/g/encoding/gcharset/gcharset.go +++ b/g/encoding/gcharset/gcharset.go @@ -9,24 +9,48 @@ // Package gcharset provides converting string to requested character encoding. // // 字符集转换方法, -// 使用mahonia实现的字符集转换方法,支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包 package gcharset import ( "bytes" "errors" "fmt" + "github.com/gogf/gf/g/container/gmap" "github.com/gogf/gf/third/github.com/axgle/mahonia" + "github.com/gogf/gf/third/golang.org/x/text/encoding" + "github.com/gogf/gf/third/golang.org/x/text/encoding/japanese" + "github.com/gogf/gf/third/golang.org/x/text/encoding/korean" "github.com/gogf/gf/third/golang.org/x/text/encoding/simplifiedchinese" + "github.com/gogf/gf/third/golang.org/x/text/encoding/traditionalchinese" "github.com/gogf/gf/third/golang.org/x/text/transform" "io/ioutil" "strings" ) +var encodingMap *gmap.Map + +func init() { + encodingMap = gmap.New() + encodingMap.Sets( + map[interface{}]interface{}{ + "GBK": simplifiedchinese.GBK, + "GB18030": simplifiedchinese.GB18030, + "HZGB2312": simplifiedchinese.HZGB2312, + "GB2312": simplifiedchinese.HZGB2312, + "EUCJP": japanese.EUCJP, + "ISO2022JP": japanese.ISO2022JP, + "SHIFTJIS": japanese.ShiftJIS, + "EUCKR": korean.EUCKR, + "BIG5": traditionalchinese.Big5, + }) +} + // 2个字符集之间的转换 func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) { + srcCharsetUpper := strings.ToUpper(srcCharset) + dstCharsetUpper := strings.ToUpper(dstCharset) - if strings.EqualFold(srcCharset, dstCharset) { + if srcCharsetUpper == dstCharsetUpper { return src, nil } @@ -34,72 +58,48 @@ func Convert(dstCharset string, srcCharset string, src string) (dst string, err d := new(mahonia.Charset) srctmp := src - switch { - case strings.EqualFold("GBK", srcCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.GBK.NewDecoder())) - if err != nil { - return "", fmt.Errorf("gbk to utf8 failed. %v", err) - } - srctmp = string(tmp) - case strings.EqualFold("GB18030", srcCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.GB18030.NewDecoder())) - if err != nil { - return "", fmt.Errorf("GB18030 to utf8 failed. %v", err) - } - srctmp = string(tmp) - case strings.EqualFold("GB2312", srcCharset) || strings.EqualFold("HZGB2312", srcCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.HZGB2312.NewDecoder())) - if err != nil { - return "", fmt.Errorf("GB2312 to utf8 failed. %v", err) - } - srctmp = string(tmp) - case strings.EqualFold("UTF-8", srcCharset): - default: - s = mahonia.GetCharset(srcCharset) - if s == nil { - return "", errors.New(fmt.Sprintf("not support charset:%s", srcCharset)) - } + if srcCharset != "UTF-8" { + enc := encodingMap.Get(srcCharset) + if enc != nil { + tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), enc.(encoding.Encoding).NewDecoder())) + if err != nil { + return "", fmt.Errorf("%s to utf8 failed. %v", srcCharset, err) + } + srctmp = string(tmp) + } else { + s = mahonia.GetCharset(srcCharsetUpper) + if s == nil { + return "", errors.New(fmt.Sprintf("not support charset:%s", srcCharset)) + } - if s.Name != "UTF-8" { - srctmp = s.NewDecoder().ConvertString(srctmp) + if s.Name != "UTF-8" { + srctmp = s.NewDecoder().ConvertString(srctmp) + } } } dst = srctmp - switch { - case strings.EqualFold("GBK", dstCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.GBK.NewEncoder())) - if err != nil { - return "", fmt.Errorf("utf to gbk failed. %v", err) - } - dst = string(tmp) - case strings.EqualFold("GB18030", dstCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.GB18030.NewEncoder())) - if err != nil { - return "", fmt.Errorf("utf8 to gb18030 failed. %v", err) - } - dst = string(tmp) - case strings.EqualFold("GB2312", dstCharset) || strings.EqualFold("HZGB2312", dstCharset): - tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.HZGB2312.NewEncoder())) - if err != nil { - return "", fmt.Errorf("utf8 to gb2312 failed. %v", err) - } - dst = string(tmp) - case strings.EqualFold("UTF-8", dstCharset): - default: - d = mahonia.GetCharset(dstCharset) - if d == nil { - return "", errors.New(fmt.Sprintf("not support charset:%s", dstCharset)) - } + if dstCharset != "UTF-8" { + enc := encodingMap.Get(dstCharset) + if enc != nil { + tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), enc.(encoding.Encoding).NewEncoder())) + if err != nil { + return "", fmt.Errorf("utf to %s failed. %v", dstCharset, err) + } + dst = string(tmp) + } else { + d = mahonia.GetCharset(dstCharsetUpper) + if d == nil { + return "", errors.New(fmt.Sprintf("not support charset:%s", dstCharset)) + } - dst = srctmp - if d.Name != "UTF-8" { - dst = d.NewEncoder().ConvertString(dst) + dst = srctmp + if d.Name != "UTF-8" { + dst = d.NewEncoder().ConvertString(dst) + } } - } - return dst, nil }