Files
gf/encoding/gcharset/gcharset.go

116 lines
3.2 KiB
Go
Raw Normal View History

2021-01-17 21:46:25 +08:00
// Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
//
// This Source Code Form is subject to the terms of the MIT License.
// If a copy of the MIT was not distributed with this file,
// You can obtain one at https://github.com/gogf/gf.
// Package gcharset implements character-set conversion functionality.
2019-01-16 13:35:16 +08:00
//
2019-06-12 20:49:40 +08:00
// Supported Character Set:
2019-06-12 23:50:37 +08:00
//
2019-06-12 20:49:40 +08:00
// Chinese : GBK/GB18030/GB2312/Big5
2019-06-12 23:50:37 +08:00
//
2019-06-12 20:49:40 +08:00
// Japanese: EUCJP/ISO2022JP/ShiftJIS
2019-06-12 23:50:37 +08:00
//
2019-06-12 20:49:40 +08:00
// Korean : EUCKR
2019-06-12 23:50:37 +08:00
//
2019-06-12 20:49:40 +08:00
// Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
2019-06-12 23:50:37 +08:00
//
2019-06-12 20:49:40 +08:00
// Other : macintosh/IBM*/Windows*/ISO-*
package gcharset
import (
2019-06-12 20:49:40 +08:00
"bytes"
"context"
2021-11-13 23:23:55 +08:00
"io/ioutil"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/transform"
2021-11-16 19:29:02 +08:00
"github.com/gogf/gf/v2/errors/gcode"
"github.com/gogf/gf/v2/errors/gerror"
"github.com/gogf/gf/v2/internal/intlog"
)
2019-06-12 20:49:40 +08:00
var (
// Alias for charsets.
2019-06-19 09:06:52 +08:00
charsetAlias = map[string]string{
"HZGB2312": "HZ-GB-2312",
"hzgb2312": "HZ-GB-2312",
"GB2312": "HZ-GB-2312",
"gb2312": "HZ-GB-2312",
2019-06-12 20:49:40 +08:00
}
)
// Supported returns whether charset `charset` is supported.
2019-06-12 23:50:37 +08:00
func Supported(charset string) bool {
return getEncoding(charset) != nil
}
// Convert converts `src` charset encoding from `srcCharset` to `dstCharset`,
2019-06-12 20:49:40 +08:00
// and returns the converted string.
// It returns `src` as `dst` if it fails converting.
func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
2019-06-12 20:49:40 +08:00
if dstCharset == srcCharset {
return src, nil
}
2019-06-12 23:50:37 +08:00
dst = src
// Converting `src` to UTF-8.
2019-06-12 20:49:40 +08:00
if srcCharset != "UTF-8" {
2019-06-12 23:50:37 +08:00
if e := getEncoding(srcCharset); e != nil {
2019-06-12 20:49:40 +08:00
tmp, err := ioutil.ReadAll(
transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
)
if err != nil {
return "", gerror.Wrapf(err, `convert string "%s" to utf8 failed`, srcCharset)
2019-06-12 20:49:40 +08:00
}
src = string(tmp)
} else {
return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported srcCharset "%s"`, srcCharset)
2019-06-12 20:49:40 +08:00
}
}
// Do the converting from UTF-8 to `dstCharset`.
2019-06-12 20:49:40 +08:00
if dstCharset != "UTF-8" {
2019-06-12 23:50:37 +08:00
if e := getEncoding(dstCharset); e != nil {
2019-06-12 20:49:40 +08:00
tmp, err := ioutil.ReadAll(
transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
)
if err != nil {
return "", gerror.Wrapf(err, `convert string from utf8 to "%s" failed`, dstCharset)
2019-06-12 20:49:40 +08:00
}
dst = string(tmp)
} else {
return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported dstCharset "%s"`, dstCharset)
2019-06-12 20:49:40 +08:00
}
2019-06-12 23:50:37 +08:00
} else {
dst = src
}
return dst, nil
}
// ToUTF8 converts `src` charset encoding from `srcCharset` to UTF-8 ,
2019-06-12 20:49:40 +08:00
// and returns the converted string.
func ToUTF8(srcCharset string, src string) (dst string, err error) {
return Convert("UTF-8", srcCharset, src)
}
// UTF8To converts `src` charset encoding from UTF-8 to `dstCharset`,
2019-06-12 20:49:40 +08:00
// and returns the converted string.
func UTF8To(dstCharset string, src string) (dst string, err error) {
return Convert(dstCharset, "UTF-8", src)
}
// getEncoding returns the encoding.Encoding interface object for `charset`.
// It returns nil if `charset` is not supported.
2019-06-12 23:50:37 +08:00
func getEncoding(charset string) encoding.Encoding {
if c, ok := charsetAlias[charset]; ok {
charset = c
}
enc, err := ianaindex.MIB.Encoding(charset)
if err != nil {
intlog.Errorf(context.TODO(), `%+v`, err)
2019-06-12 23:50:37 +08:00
}
return enc
2019-06-19 09:06:52 +08:00
}