refactor(encoding/gjson): enhance auto type checks when loading data without type specified (#4637)

This pull request improves YAML support for i18n translation files and
refactors content type detection and loading logic in the `gjson`
package. The main changes include more robust detection of YAML, TOML,
INI, and Properties formats, refactoring of content type handling, and
the addition of new tests to ensure correct parsing of YAML-based i18n
resources.

### Improved content type detection and loading

* Refactored content type detection logic in `gjson` to use dedicated
functions for XML, YAML, TOML, INI, and Properties formats, making the
detection more reliable and maintainable.
* Changed the content loading mechanism in `gjson` to use specific
decode functions (`gxml.Decode`, `gyaml.Decode`, etc.) for each format
instead of converting everything to JSON first, improving accuracy and
extensibility.
* Updated type definitions and struct field comments in `gjson.go` for
clarity and consistency, including changing `ContentType` to a type
alias and improving documentation.
[[1]](diffhunk://#diff-0e4432d7e4cf171c0339e01b1842530432b986948d7f839a155543623236a03fL24-R24)
[[2]](diffhunk://#diff-0e4432d7e4cf171c0339e01b1842530432b986948d7f839a155543623236a03fL38-R71)

### i18n YAML support

* Modified i18n manager to use the new `gjson.LoadPath` method for
loading translation files, ensuring correct parsing of YAML files for
i18n.
* Added new test cases and test data for loading and verifying YAML i18n
files, including edge cases and real-world translation strings.
[[1]](diffhunk://#diff-e6eacc5abab33c149f9b39d8ebe300cf4d0abe907434605991984a5969e8707dR262-R283)
[[2]](diffhunk://#diff-1bfd438797c1f9ef18ab3cb00d23ae95202e85e2362c39c3df4f1a29c55733feR421-R430)
[[3]](diffhunk://#diff-a3ee37ff2a67c9e1ba2e1617e0f5fd63eb261ad7760a07423f703538138c2decR1-R16)

### Minor improvements

* Simplified file loading logic in `gjson.LoadPath` by removing caching
and directly reading file bytes, which streamlines the code and avoids
potential cache issues.

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
John Guo
2026-01-20 19:25:23 +08:00
committed by GitHub
parent 102c3b6cb0
commit f3f2cb3c57
8 changed files with 239 additions and 57 deletions

View File

@ -21,7 +21,7 @@ import (
"github.com/gogf/gf/v2/util/gconv"
)
type ContentType string
type ContentType = string
const (
ContentTypeJSON ContentType = `json`
@ -35,23 +35,40 @@ const (
)
const (
defaultSplitChar = '.' // Separator char for hierarchical data access.
// Separator char for hierarchical data access.
defaultSplitChar = '.'
)
// Json is the customized JSON struct.
type Json struct {
mu rwmutex.RWMutex
p *any // Pointer for hierarchical data access, it's the root of data in default.
c byte // Char separator('.' in default).
vc bool // Violence Check(false in default), which is used to access data when the hierarchical data key contains separator char.
// Pointer for hierarchical data access, it's the root of data in default.
p *any
// Char separator('.' in default).
c byte
// Violence Check(false in default),
// which is used to access data when the hierarchical data key contains separator char.
vc bool
}
// Options for Json object creating/loading.
type Options struct {
Safe bool // Mark this object is for in concurrent-safe usage. This is especially for Json object creating.
Tags string // Custom priority tags for decoding, eg: "json,yaml,MyTag". This is especially for struct parsing into Json object.
Type ContentType // Type specifies the data content type, eg: json, xml, yaml, toml, ini.
StrNumber bool // StrNumber causes the Decoder to unmarshal a number into an any as a string instead of as a float64.
// Mark this object is for in concurrent-safe usage. This is especially for Json object creating.
Safe bool
// Custom priority tags for decoding, eg: "json,yaml,MyTag".
// This is specially for struct parsing into Json object.
Tags string
// Type specifies the data content type, eg: json, xml, yaml, toml, ini.
Type ContentType
// StrNumber causes the Decoder to unmarshal a number into an any as a string instead of as a float64.
// This is specially for json content parsing into Json object.
StrNumber bool
}
// iInterfaces is used for type assert api for Interfaces().

View File

@ -161,56 +161,86 @@ func loadContentWithOptions(data []byte, options Options) (*Json, error) {
if len(data) == 0 {
return NewWithOptions(nil, options), nil
}
if options.Type == "" {
options.Type, err = checkDataType(data)
var (
checkType ContentType
decodedData any
)
if options.Type != "" {
checkType = gstr.TrimLeft(options.Type, ".")
} else {
checkType, err = checkDataType(data)
if err != nil {
return nil, err
}
}
options.Type = ContentType(gstr.TrimLeft(
string(options.Type), "."),
)
switch options.Type {
switch checkType {
case ContentTypeJSON, ContentTypeJs:
decoder := json.NewDecoder(bytes.NewReader(data))
if options.StrNumber {
decoder.UseNumber()
}
if err = decoder.Decode(&result); err != nil {
return nil, err
}
switch result.(type) {
case string, []byte:
return nil, gerror.Newf(`json decoding failed for content: %s`, data)
}
return NewWithOptions(result, options), nil
case ContentTypeXML:
data, err = gxml.ToJson(data)
decodedData, err = gxml.Decode(data)
if err != nil {
return nil, err
}
return NewWithOptions(decodedData, options), nil
case ContentTypeYaml, ContentTypeYml:
data, err = gyaml.ToJson(data)
decodedData, err = gyaml.Decode(data)
if err != nil {
return nil, err
}
return NewWithOptions(decodedData, options), nil
case ContentTypeToml:
data, err = gtoml.ToJson(data)
decodedData, err = gtoml.Decode(data)
if err != nil {
return nil, err
}
return NewWithOptions(decodedData, options), nil
case ContentTypeIni:
data, err = gini.ToJson(data)
decodedData, err = gini.Decode(data)
if err != nil {
return nil, err
}
return NewWithOptions(decodedData, options), nil
case ContentTypeProperties:
data, err = gproperties.ToJson(data)
decodedData, err = gproperties.Decode(data)
if err != nil {
return nil, err
}
return NewWithOptions(decodedData, options), nil
default:
err = gerror.NewCodef(
gcode.CodeInvalidParameter,
`unsupported type "%s" for loading`,
options.Type,
)
}
if err != nil {
return nil, err
}
decoder := json.NewDecoder(bytes.NewReader(data))
if options.StrNumber {
decoder.UseNumber()
}
if err = decoder.Decode(&result); err != nil {
return nil, err
// ignore some duplicated types, like js and yml,
// which are not necessary shown in error message.
allSupportedTypes := []string{
ContentTypeJSON,
ContentTypeXML,
ContentTypeYaml,
ContentTypeToml,
ContentTypeIni,
ContentTypeProperties,
}
switch result.(type) {
case string, []byte:
return nil, gerror.Newf(`json decoding failed for content: %s`, data)
}
return NewWithOptions(result, options), nil
return nil, gerror.NewCodef(
gcode.CodeInvalidParameter,
`unsupported type "%s" for loading, all supported types: %s`,
options.Type, gstr.Join(allSupportedTypes, ", "),
)
}
// checkDataType automatically checks and returns the data type for `content`.
@ -247,33 +277,104 @@ func checkDataType(data []byte) (ContentType, error) {
}
}
// isXMLContent checks whether given content is XML format.
// XML format is easy to be identified using regular expression.
func isXMLContent(data []byte) bool {
return gregex.IsMatch(`^\s*<.+>[\S\s]+<.+>\s*$`, data)
}
// isYamlContent checks whether given content is YAML format.
func isYamlContent(data []byte) bool {
return !gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*"""[\s\S]+"""`, data) &&
!gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*'''[\s\S]+'''`, data) &&
((gregex.IsMatch(`^[\n\r]*[\w\-\s\t]+\s*:\s*".+"`, data) ||
gregex.IsMatch(`^[\n\r]*[\w\-\s\t]+\s*:\s*\w+`, data)) ||
(gregex.IsMatch(`[\n\r]+[\w\-\s\t]+\s*:\s*".+"`, data) ||
gregex.IsMatch(`[\n\r]+[\w\-\s\t]+\s*:\s*\w+`, data)))
// x = y
// "x.x" = "y"
tomlFormat1 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*"""[\s\S]+"""`, data)
if tomlFormat1 {
return false
}
// "x.x" = '''
// y
// '''
tomlFormat2 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*'''[\s\S]+'''`, data)
if tomlFormat2 {
return false
}
// content starts with:
// x : "y"
yamlFormat1 := gregex.IsMatch(`^[\n\r]*[\w\-\s\t]+\s*:\s+".+"`, data)
// content starts with:
// x : y
yamlFormat2 := gregex.IsMatch(`^[\n\r]*[\w\-\s\t]+\s*:\s+\w+`, data)
// line starts with:
// x : "y"
yamlFormat3 := gregex.IsMatch(`[\n\r]+[\w\-\s\t]+\s*:\s+".+"`, data)
// line starts with:
// x : y
yamlFormat4 := gregex.IsMatch(`[\n\r]+[\w\-\s\t]+\s*:\s+\w+`, data)
// content starts with:
// "x" : "y"
yamlFormat5 := gregex.IsMatch(`^[\n\r]*".+":\s+".+"`, data)
// line starts with:
// "x" : y
yamlFormat6 := gregex.IsMatch(`[\n\r]+".+":\s+\w+`, data)
return yamlFormat1 || yamlFormat2 || yamlFormat3 || yamlFormat4 || yamlFormat5 || yamlFormat6
}
// isTomlContent checks whether given content is TOML format.
func isTomlContent(data []byte) bool {
return !gregex.IsMatch(`^[\s\t\n\r]*;.+`, data) &&
!gregex.IsMatch(`[\s\t\n\r]+;.+`, data) &&
!gregex.IsMatch(`[\n\r]+[\s\t\w\-]+\.[\s\t\w\-]+\s*=\s*.+`, data) &&
(gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*".+"`, data) ||
gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data))
// content starts with:
// ; comment line
contentStartsWithSemicolonComment := gregex.IsMatch(`^[\s\t\n\r]*;.+`, data)
if contentStartsWithSemicolonComment {
return false
}
// line starts with:
// ; comment line
lineStartsWithSemicolonComment := gregex.IsMatch(`[\s\t\n\r]+;.+`, data)
if lineStartsWithSemicolonComment {
return false
}
// line starts with, this should not be toml format:
// key.with.dot = value
keyWithDot := gregex.IsMatch(`[\n\r]+[\s\t\w\-]+\.[\s\t\w\-]+\s*=\s*.+`, data)
if keyWithDot {
return false
}
// line starts with:
// key = value
// key = "value"
// "key" = "value"
// "key" = value
tomlFormat1 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*".+"`, data)
tomlFormat2 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data)
return tomlFormat1 || tomlFormat2
}
// isIniContent checks whether given content is INI format.
func isIniContent(data []byte) bool {
return gregex.IsMatch(`\[[\w\.]+\]`, data) &&
(gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*".+"`, data) ||
gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data))
// no section like: [section], but ini format must have sections.
hasBrackets := gregex.IsMatch(`\[[\w\.]+\]`, data)
if !hasBrackets {
return false
}
iniFormat1 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*".+"`, data)
iniFormat2 := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data)
return iniFormat1 || iniFormat2
}
// isPropertyContent checks whether given content is Properties format.
func isPropertyContent(data []byte) bool {
return gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data)
// line starts with:
// key = value
// "key" = value
propertyFormat := gregex.IsMatch(`[\n\r]*[\s\t\w\-\."]+\s*=\s*\w+`, data)
return propertyFormat
}

View File

@ -29,7 +29,7 @@ func LoadPath(path string, options Options) (*Json, error) {
path = p
}
if options.Type == "" {
options.Type = ContentType(gfile.Ext(path))
options.Type = gfile.Ext(path)
}
return loadContentWithOptions(gfile.GetBytesWithCache(path), options)
return loadContentWithOptions(gfile.GetBytes(path), options)
}

View File

@ -418,3 +418,13 @@ DBINFO.password=password
t.AssertNE(err, nil)
})
}
func Test_Load_YAML_For_I18n(t *testing.T) {
var data = []byte(gtest.DataContent("yaml", "i18n-issue.yaml"))
gtest.C(t, func(t *gtest.T) {
j, err := gjson.LoadContent(data)
t.AssertNil(err)
j.SetViolenceCheck(true)
t.Assert(j.Get("resourceUsage.workflow").String(), "workflow")
})
}

View File

@ -0,0 +1,16 @@
"environment status is Creating/Updating, please wait for sync to complete": "环境当前状为创建中/更新中,请等待同步完成"
"There are still queues in the current environment, please ensure there are no queues before deletion": "当前环境还存在队列,确保环境没有队列再删除"
"the current repository has associated environments in use, please ensure no environment associations before deleting the repository": "当前仓库有关联环境正在使用,请确保没有环境关联再删除该仓库"
"There are environments using this cluster, please ensure all environments have been deleted before deleting the cluster": "当前集群存在环境正在使用,请确保所有环境已经删除再删除该集群"
"shareStrategy.Init": "未拆卡"
"shareStrategy.Pending": "切分中"
"shareStrategy.Success": "拆卡成功"
"shareStrategy.Canceling": "拆卡取消中"
"shareStrategy.unknown": "未知状态"
"resourceUsage.none": "无"
"resourceUsage.inference": "推理"
"resourceUsage.training": "训练"
"resourceUsage.workflow": "workflow"
"resourceUsage.hybrid": "混合"
"resourceUsage.unknown": "unknown"

View File

@ -298,7 +298,7 @@ func (m *Manager) init(ctx context.Context) {
if m.data[lang] == nil {
m.data[lang] = make(map[string]string)
}
if j, err := gjson.LoadContent(gfile.GetBytes(file)); err == nil {
if j, err := gjson.LoadPath(file, gjson.Options{}); err == nil {
for k, v := range j.Var().Map() {
m.data[lang][k] = gconv.String(v)
}

View File

@ -259,3 +259,25 @@ func Test_PathInNormal(t *testing.T) {
t.Assert(i18n.T(context.Background(), "{#lang}"), "en-US")
})
}
func Test_Issue_Yaml(t *testing.T) {
// Copy i18n files to current directory.
err := gfile.CopyDir(
gtest.DataPath("issue-yaml"),
gfile.Join(gdebug.CallerDirectory(), "manifest/i18n"),
)
// Remove copied files after testing.
defer gfile.RemoveAll(gfile.Join(gdebug.CallerDirectory(), "manifest"))
gtest.AssertNil(err)
var (
i18n = gi18n.New()
ctx = context.Background()
)
gtest.C(t, func(t *gtest.T) {
i18n.SetLanguage("zh")
t.Assert(i18n.T(ctx, "{#resourceUsage.workflow}"), "workflow")
})
}

16
i18n/gi18n/testdata/issue-yaml/zh.yaml vendored Normal file
View File

@ -0,0 +1,16 @@
"environment status is Creating/Updating, please wait for sync to complete": "环境当前状为创建中/更新中,请等待同步完成"
"There are still queues in the current environment, please ensure there are no queues before deletion": "当前环境还存在队列,确保环境没有队列再删除"
"the current repository has associated environments in use, please ensure no environment associations before deleting the repository": "当前仓库有关联环境正在使用,请确保没有环境关联再删除该仓库"
"There are environments using this cluster, please ensure all environments have been deleted before deleting the cluster": "当前集群存在环境正在使用,请确保所有环境已经删除再删除该集群"
"shareStrategy.Init": "未拆卡"
"shareStrategy.Pending": "切分中"
"shareStrategy.Success": "拆卡成功"
"shareStrategy.Canceling": "拆卡取消中"
"shareStrategy.unknown": "未知状态"
"resourceUsage.none": "无"
"resourceUsage.inference": "推理"
"resourceUsage.training": "训练"
"resourceUsage.workflow": "workflow"
"resourceUsage.hybrid": "混合"
"resourceUsage.unknown": "unknown"