mirror of
https://gitee.com/johng/gf
synced 2026-06-06 02:25:47 +08:00
使用encoding库做字符集转换
This commit is contained in:
@ -9,24 +9,48 @@
|
||||
// Package gcharset provides converting string to requested character encoding.
|
||||
//
|
||||
// 字符集转换方法,
|
||||
// 使用mahonia实现的字符集转换方法,支持的字符集包括常见的utf8/UTF-16/UTF-16LE/macintosh/big5/gbk/gb18030,支持的全量字符集可以参考mahonia包
|
||||
package gcharset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gogf/gf/g/container/gmap"
|
||||
"github.com/gogf/gf/third/github.com/axgle/mahonia"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/encoding"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/encoding/japanese"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/encoding/korean"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/encoding/simplifiedchinese"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/encoding/traditionalchinese"
|
||||
"github.com/gogf/gf/third/golang.org/x/text/transform"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var encodingMap *gmap.Map
|
||||
|
||||
func init() {
|
||||
encodingMap = gmap.New()
|
||||
encodingMap.Sets(
|
||||
map[interface{}]interface{}{
|
||||
"GBK": simplifiedchinese.GBK,
|
||||
"GB18030": simplifiedchinese.GB18030,
|
||||
"HZGB2312": simplifiedchinese.HZGB2312,
|
||||
"GB2312": simplifiedchinese.HZGB2312,
|
||||
"EUCJP": japanese.EUCJP,
|
||||
"ISO2022JP": japanese.ISO2022JP,
|
||||
"SHIFTJIS": japanese.ShiftJIS,
|
||||
"EUCKR": korean.EUCKR,
|
||||
"BIG5": traditionalchinese.Big5,
|
||||
})
|
||||
}
|
||||
|
||||
// 2个字符集之间的转换
|
||||
func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
|
||||
srcCharsetUpper := strings.ToUpper(srcCharset)
|
||||
dstCharsetUpper := strings.ToUpper(dstCharset)
|
||||
|
||||
if strings.EqualFold(srcCharset, dstCharset) {
|
||||
if srcCharsetUpper == dstCharsetUpper {
|
||||
return src, nil
|
||||
}
|
||||
|
||||
@ -34,72 +58,48 @@ func Convert(dstCharset string, srcCharset string, src string) (dst string, err
|
||||
d := new(mahonia.Charset)
|
||||
srctmp := src
|
||||
|
||||
switch {
|
||||
case strings.EqualFold("GBK", srcCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.GBK.NewDecoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("gbk to utf8 failed. %v", err)
|
||||
}
|
||||
srctmp = string(tmp)
|
||||
case strings.EqualFold("GB18030", srcCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.GB18030.NewDecoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("GB18030 to utf8 failed. %v", err)
|
||||
}
|
||||
srctmp = string(tmp)
|
||||
case strings.EqualFold("GB2312", srcCharset) || strings.EqualFold("HZGB2312", srcCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), simplifiedchinese.HZGB2312.NewDecoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("GB2312 to utf8 failed. %v", err)
|
||||
}
|
||||
srctmp = string(tmp)
|
||||
case strings.EqualFold("UTF-8", srcCharset):
|
||||
default:
|
||||
s = mahonia.GetCharset(srcCharset)
|
||||
if s == nil {
|
||||
return "", errors.New(fmt.Sprintf("not support charset:%s", srcCharset))
|
||||
}
|
||||
if srcCharset != "UTF-8" {
|
||||
enc := encodingMap.Get(srcCharset)
|
||||
if enc != nil {
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(src)), enc.(encoding.Encoding).NewDecoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("%s to utf8 failed. %v", srcCharset, err)
|
||||
}
|
||||
srctmp = string(tmp)
|
||||
} else {
|
||||
s = mahonia.GetCharset(srcCharsetUpper)
|
||||
if s == nil {
|
||||
return "", errors.New(fmt.Sprintf("not support charset:%s", srcCharset))
|
||||
}
|
||||
|
||||
if s.Name != "UTF-8" {
|
||||
srctmp = s.NewDecoder().ConvertString(srctmp)
|
||||
if s.Name != "UTF-8" {
|
||||
srctmp = s.NewDecoder().ConvertString(srctmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst = srctmp
|
||||
|
||||
switch {
|
||||
case strings.EqualFold("GBK", dstCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.GBK.NewEncoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("utf to gbk failed. %v", err)
|
||||
}
|
||||
dst = string(tmp)
|
||||
case strings.EqualFold("GB18030", dstCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.GB18030.NewEncoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("utf8 to gb18030 failed. %v", err)
|
||||
}
|
||||
dst = string(tmp)
|
||||
case strings.EqualFold("GB2312", dstCharset) || strings.EqualFold("HZGB2312", dstCharset):
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), simplifiedchinese.HZGB2312.NewEncoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("utf8 to gb2312 failed. %v", err)
|
||||
}
|
||||
dst = string(tmp)
|
||||
case strings.EqualFold("UTF-8", dstCharset):
|
||||
default:
|
||||
d = mahonia.GetCharset(dstCharset)
|
||||
if d == nil {
|
||||
return "", errors.New(fmt.Sprintf("not support charset:%s", dstCharset))
|
||||
}
|
||||
if dstCharset != "UTF-8" {
|
||||
enc := encodingMap.Get(dstCharset)
|
||||
if enc != nil {
|
||||
tmp, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(srctmp)), enc.(encoding.Encoding).NewEncoder()))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("utf to %s failed. %v", dstCharset, err)
|
||||
}
|
||||
dst = string(tmp)
|
||||
} else {
|
||||
d = mahonia.GetCharset(dstCharsetUpper)
|
||||
if d == nil {
|
||||
return "", errors.New(fmt.Sprintf("not support charset:%s", dstCharset))
|
||||
}
|
||||
|
||||
dst = srctmp
|
||||
if d.Name != "UTF-8" {
|
||||
dst = d.NewEncoder().ConvertString(dst)
|
||||
dst = srctmp
|
||||
if d.Name != "UTF-8" {
|
||||
dst = d.NewEncoder().ConvertString(dst)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user