mirror of
https://gitee.com/johng/gf
synced 2026-07-04 13:02:36 +08:00
455 lines
14 KiB
Go
Executable File
455 lines
14 KiB
Go
Executable File
// Unmarshal arbitrary XML docs to map[string]interface{} or JSON and extract values (using wildcards, if necessary).
|
|
// Copyright 2012-2018 Charles Banning. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file
|
|
|
|
/*
|
|
Unmarshal dynamic / arbitrary XML docs and extract values (using wildcards, if necessary).
|
|
THIS IS ONLY PROVIDED TO FACILIATE MIGRATING TO "mxj" PACKAGE FROM "x2j" PACKAGE.
|
|
|
|
NOTICE: 03mar18, package mostly replicates github.com/clbanning/x2j using github.com/clbanning/mxj
|
|
(Note: there is no concept of Node or Tree; only direct decoding to map[string]interface{}.)
|
|
|
|
One useful function is:
|
|
|
|
- Unmarshal(doc []byte, v interface{}) error
|
|
where v is a pointer to a variable of type 'map[string]interface{}', 'string', or
|
|
any other type supported by xml.Unmarshal().
|
|
|
|
To retrieve a value for specific tag use:
|
|
|
|
- DocValue(doc, path string, attrs ...string) (interface{},error)
|
|
- MapValue(m map[string]interface{}, path string, attr map[string]interface{}, recast ...bool) (interface{}, error)
|
|
|
|
The 'path' argument is a period-separated tag hierarchy - also known as dot-notation.
|
|
It is the program's responsibility to cast the returned value to the proper type; possible
|
|
types are the normal JSON unmarshaling types: string, float64, bool, []interface, map[string]interface{}.
|
|
|
|
To retrieve all values associated with a tag occurring anywhere in the XML document use:
|
|
|
|
- ValuesForTag(doc, tag string) ([]interface{}, error)
|
|
- ValuesForKey(m map[string]interface{}, key string) []interface{}
|
|
|
|
Demos: http://play.golang.org/p/m8zP-cpk0O
|
|
http://play.golang.org/p/cIteTS1iSg
|
|
http://play.golang.org/p/vd8pMiI21b
|
|
|
|
Returned values should be one of map[string]interface, []interface{}, or string.
|
|
|
|
All the values assocated with a tag-path that may include one or more wildcard characters -
|
|
'*' - can also be retrieved using:
|
|
|
|
- ValuesFromTagPath(doc, path string, getAttrs ...bool) ([]interface{}, error)
|
|
- ValuesFromKeyPath(map[string]interface{}, path string, getAttrs ...bool) []interface{}
|
|
|
|
Demos: http://play.golang.org/p/kUQnZ8VuhS
|
|
http://play.golang.org/p/l1aMHYtz7G
|
|
|
|
NOTE: care should be taken when using "*" at the end of a path - i.e., "books.book.*". See
|
|
the x2jpath_test.go case on how the wildcard returns all key values and collapses list values;
|
|
the same message structure can load a []interface{} or a map[string]interface{} (or an interface{})
|
|
value for a tag.
|
|
|
|
See the test cases in "x2jpath_test.go" and programs in "example" subdirectory for more.
|
|
|
|
XML PARSING CONVENTIONS
|
|
|
|
- Attributes are parsed to map[string]interface{} values by prefixing a hyphen, '-',
|
|
to the attribute label.
|
|
- If the element is a simple element and has attributes, the element value
|
|
is given the key '#text' for its map[string]interface{} representation. (See
|
|
the 'atomFeedString.xml' test data, below.)
|
|
|
|
io.Reader HANDLING
|
|
|
|
ToMap(), ToJson(), and ToJsonIndent() provide parsing of messages from an io.Reader.
|
|
If you want to handle a message stream, look at XmlMsgsFromReader().
|
|
|
|
NON-UTF8 CHARACTER SETS
|
|
|
|
Use the X2jCharsetReader variable to assign io.Reader for alternative character sets.
|
|
|
|
*/
|
|
package x2j
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/xml"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/gogf/gf/third/github.com/clbanning/mxj"
|
|
)
|
|
|
|
// If X2jCharsetReader != nil, it will be used to decode the doc or stream if required
|
|
// import charset "code.google.com/p/go-charset/charset"
|
|
// ...
|
|
// x2j.X2jCharsetReader = charset.NewReader
|
|
// s, err := x2j.DocToJson(doc)
|
|
var X2jCharsetReader func(charset string, input io.Reader) (io.Reader, error)
|
|
|
|
// DocToJson - return an XML doc as a JSON string.
|
|
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
|
|
func DocToJson(doc string, recast ...bool) (string, error) {
|
|
var r bool
|
|
if len(recast) == 1 {
|
|
r = recast[0]
|
|
}
|
|
m, merr := mxj.NewMapXml([]byte(doc), r)
|
|
if m == nil || merr != nil {
|
|
return "", merr
|
|
}
|
|
|
|
b, berr := m.Json()
|
|
if berr != nil {
|
|
return "", berr
|
|
}
|
|
|
|
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
|
|
return string(b), nil
|
|
}
|
|
|
|
// DocToJsonIndent - return an XML doc as a prettified JSON string.
|
|
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
|
|
// Note: recasting is only applied to element values, not attribute values.
|
|
func DocToJsonIndent(doc string, recast ...bool) (string, error) {
|
|
var r bool
|
|
if len(recast) == 1 {
|
|
r = recast[0]
|
|
}
|
|
m, merr := mxj.NewMapXml([]byte(doc), r)
|
|
if m == nil || merr != nil {
|
|
return "", merr
|
|
}
|
|
|
|
b, berr := m.JsonIndent("", " ")
|
|
if berr != nil {
|
|
return "", berr
|
|
}
|
|
|
|
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
|
|
return string(b), nil
|
|
}
|
|
|
|
// DocToMap - convert an XML doc into a map[string]interface{}.
|
|
// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
|
|
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
|
|
// Note: recasting is only applied to element values, not attribute values.
|
|
func DocToMap(doc string, recast ...bool) (map[string]interface{}, error) {
|
|
var r bool
|
|
if len(recast) == 1 {
|
|
r = recast[0]
|
|
}
|
|
return mxj.NewMapXml([]byte(doc), r)
|
|
}
|
|
|
|
// WriteMap - dumps the map[string]interface{} for examination.
|
|
// 'offset' is initial indentation count; typically: WriteMap(m).
|
|
// NOTE: with XML all element types are 'string'.
|
|
// But code written as generic for use with maps[string]interface{} values from json.Unmarshal().
|
|
// Or it can handle a DocToMap(doc,true) result where values have been recast'd.
|
|
func WriteMap(m interface{}, offset ...int) string {
|
|
var indent int
|
|
if len(offset) == 1 {
|
|
indent = offset[0]
|
|
}
|
|
|
|
var s string
|
|
switch m.(type) {
|
|
case nil:
|
|
return "[nil] nil"
|
|
case string:
|
|
return "[string] " + m.(string)
|
|
case float64:
|
|
return "[float64] " + strconv.FormatFloat(m.(float64), 'e', 2, 64)
|
|
case bool:
|
|
return "[bool] " + strconv.FormatBool(m.(bool))
|
|
case []interface{}:
|
|
s += "[[]interface{}]"
|
|
for i, v := range m.([]interface{}) {
|
|
s += "\n"
|
|
for i := 0; i < indent; i++ {
|
|
s += " "
|
|
}
|
|
s += "[item: " + strconv.FormatInt(int64(i), 10) + "]"
|
|
switch v.(type) {
|
|
case string, float64, bool:
|
|
s += "\n"
|
|
default:
|
|
// noop
|
|
}
|
|
for i := 0; i < indent; i++ {
|
|
s += " "
|
|
}
|
|
s += WriteMap(v, indent+1)
|
|
}
|
|
case map[string]interface{}:
|
|
for k, v := range m.(map[string]interface{}) {
|
|
s += "\n"
|
|
for i := 0; i < indent; i++ {
|
|
s += " "
|
|
}
|
|
// s += "[map[string]interface{}] "+k+" :"+WriteMap(v,indent+1)
|
|
s += k + " :" + WriteMap(v, indent+1)
|
|
}
|
|
default:
|
|
// shouldn't ever be here ...
|
|
s += fmt.Sprintf("unknown type for: %v", m)
|
|
}
|
|
return s
|
|
}
|
|
|
|
// ------------------------ value extraction from XML doc --------------------------
|
|
|
|
// DocValue - return a value for a specific tag
|
|
// 'doc' is a valid XML message.
|
|
// 'path' is a hierarchy of XML tags, e.g., "doc.name".
|
|
// 'attrs' is an OPTIONAL list of "name:value" pairs for attributes.
|
|
// Note: 'recast' is not enabled here. Use DocToMap(), NewAttributeMap(), and MapValue() calls for that.
|
|
func DocValue(doc, path string, attrs ...string) (interface{}, error) {
|
|
m, err := mxj.NewMapXml([]byte(doc), false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
a, err := NewAttributeMap(attrs...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
v, verr := MapValue(m, path, a)
|
|
if verr != nil {
|
|
return nil, verr
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
// MapValue - retrieves value based on walking the map, 'm'.
|
|
// 'm' is the map value of interest.
|
|
// 'path' is a period-separated hierarchy of keys in the map.
|
|
// 'attr' is a map of attribute "name:value" pairs from NewAttributeMap(). May be 'nil'.
|
|
// If the path can't be traversed, an error is returned.
|
|
// Note: the optional argument 'r' can be used to coerce attribute values, 'attr', if done so for 'm'.
|
|
func MapValue(m map[string]interface{}, path string, attr map[string]interface{}, r ...bool) (interface{}, error) {
|
|
// attribute values may have been recasted during map construction; default is 'false'.
|
|
if len(r) == 1 && r[0] == true {
|
|
for k, v := range attr {
|
|
attr[k] = recast(v.(string), true)
|
|
}
|
|
}
|
|
|
|
// parse the path
|
|
keys := strings.Split(path, ".")
|
|
|
|
// initialize return value to 'm' so a path of "" will work correctly
|
|
var v interface{} = m
|
|
var ok bool
|
|
var okey string
|
|
var isMap bool = true
|
|
if keys[0] == "" && len(attr) == 0 {
|
|
return v, nil
|
|
}
|
|
for _, key := range keys {
|
|
if !isMap {
|
|
return nil, errors.New("no keys beyond: " + okey)
|
|
}
|
|
if v, ok = m[key]; !ok {
|
|
return nil, errors.New("no key in map: " + key)
|
|
} else {
|
|
switch v.(type) {
|
|
case map[string]interface{}:
|
|
m = v.(map[string]interface{})
|
|
isMap = true
|
|
default:
|
|
isMap = false
|
|
}
|
|
}
|
|
// save 'key' for error reporting
|
|
okey = key
|
|
}
|
|
|
|
// match attributes; value is "#text" or nil
|
|
if attr == nil {
|
|
return v, nil
|
|
}
|
|
return hasAttributes(v, attr)
|
|
}
|
|
|
|
// recast - try to cast string values to bool or float64
|
|
func recast(s string, r bool) interface{} {
|
|
if r {
|
|
// handle numeric strings ahead of boolean
|
|
if f, err := strconv.ParseFloat(s, 64); err == nil {
|
|
return interface{}(f)
|
|
}
|
|
// ParseBool treats "1"==true & "0"==false
|
|
if b, err := strconv.ParseBool(s); err == nil {
|
|
return interface{}(b)
|
|
}
|
|
}
|
|
return interface{}(s)
|
|
}
|
|
|
|
// hasAttributes() - interface{} equality works for string, float64, bool
|
|
func hasAttributes(v interface{}, a map[string]interface{}) (interface{}, error) {
|
|
switch v.(type) {
|
|
case []interface{}:
|
|
// run through all entries looking one with matching attributes
|
|
for _, vv := range v.([]interface{}) {
|
|
if vvv, vvverr := hasAttributes(vv, a); vvverr == nil {
|
|
return vvv, nil
|
|
}
|
|
}
|
|
return nil, errors.New("no list member with matching attributes")
|
|
case map[string]interface{}:
|
|
// do all attribute name:value pairs match?
|
|
nv := v.(map[string]interface{})
|
|
for key, val := range a {
|
|
if vv, ok := nv[key]; !ok {
|
|
return nil, errors.New("no attribute with name: " + key[1:])
|
|
} else if val != vv {
|
|
return nil, errors.New("no attribute key:value pair: " + fmt.Sprintf("%s:%v", key[1:], val))
|
|
}
|
|
}
|
|
// they all match; so return value associated with "#text" key.
|
|
if vv, ok := nv["#text"]; ok {
|
|
return vv, nil
|
|
} else {
|
|
// this happens when another element is value of tag rather than just a string value
|
|
return nv, nil
|
|
}
|
|
}
|
|
return nil, errors.New("no match for attributes")
|
|
}
|
|
|
|
// NewAttributeMap() - generate map of attributes=value entries as map["-"+string]string.
|
|
// 'kv' arguments are "name:value" pairs that appear as attributes, name="value".
|
|
// If len(kv) == 0, the return is (nil, nil).
|
|
func NewAttributeMap(kv ...string) (map[string]interface{}, error) {
|
|
if len(kv) == 0 {
|
|
return nil, nil
|
|
}
|
|
m := make(map[string]interface{}, 0)
|
|
for _, v := range kv {
|
|
vv := strings.Split(v, ":")
|
|
if len(vv) != 2 {
|
|
return nil, errors.New("attribute not \"name:value\" pair: " + v)
|
|
}
|
|
// attributes are stored as keys prepended with hyphen
|
|
m["-"+vv[0]] = interface{}(vv[1])
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
//------------------------- get values for key ----------------------------
|
|
|
|
// ValuesForTag - return all values in doc associated with 'tag'.
|
|
// Returns nil if the 'tag' does not occur in the doc.
|
|
// If there is an error encounted while parsing doc, that is returned.
|
|
// If you want values 'recast' use DocToMap() and ValuesForKey().
|
|
func ValuesForTag(doc, tag string) ([]interface{}, error) {
|
|
m, err := mxj.NewMapXml([]byte(doc))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return ValuesForKey(m, tag), nil
|
|
}
|
|
|
|
// ValuesForKey - return all values in map associated with 'key'
|
|
// Returns nil if the 'key' does not occur in the map
|
|
func ValuesForKey(m map[string]interface{}, key string) []interface{} {
|
|
ret := make([]interface{}, 0)
|
|
|
|
hasKey(m, key, &ret)
|
|
if len(ret) > 0 {
|
|
return ret
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// hasKey - if the map 'key' exists append it to array
|
|
// if it doesn't do nothing except scan array and map values
|
|
func hasKey(iv interface{}, key string, ret *[]interface{}) {
|
|
switch iv.(type) {
|
|
case map[string]interface{}:
|
|
vv := iv.(map[string]interface{})
|
|
if v, ok := vv[key]; ok {
|
|
*ret = append(*ret, v)
|
|
}
|
|
for _, v := range iv.(map[string]interface{}) {
|
|
hasKey(v, key, ret)
|
|
}
|
|
case []interface{}:
|
|
for _, v := range iv.([]interface{}) {
|
|
hasKey(v, key, ret)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ======== 2013.07.01 - x2j.Unmarshal, wraps xml.Unmarshal ==============
|
|
|
|
// Unmarshal - wraps xml.Unmarshal with handling of map[string]interface{}
|
|
// and string type variables.
|
|
// Usage: x2j.Unmarshal(doc,&m) where m of type map[string]interface{}
|
|
// x2j.Unmarshal(doc,&s) where s of type string (Overrides xml.Unmarshal().)
|
|
// x2j.Unmarshal(doc,&struct) - passed to xml.Unmarshal()
|
|
// x2j.Unmarshal(doc,&slice) - passed to xml.Unmarshal()
|
|
func Unmarshal(doc []byte, v interface{}) error {
|
|
switch v.(type) {
|
|
case *map[string]interface{}:
|
|
m, err := mxj.NewMapXml(doc)
|
|
vv := *v.(*map[string]interface{})
|
|
for k, v := range m {
|
|
vv[k] = v
|
|
}
|
|
return err
|
|
case *string:
|
|
s, err := ByteDocToJson(doc)
|
|
*(v.(*string)) = s
|
|
return err
|
|
default:
|
|
b := bytes.NewBuffer(doc)
|
|
p := xml.NewDecoder(b)
|
|
p.CharsetReader = X2jCharsetReader
|
|
return p.Decode(v)
|
|
// return xml.Unmarshal(doc, v)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ByteDocToJson - return an XML doc as a JSON string.
|
|
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
|
|
func ByteDocToJson(doc []byte, recast ...bool) (string, error) {
|
|
var r bool
|
|
if len(recast) == 1 {
|
|
r = recast[0]
|
|
}
|
|
m, merr := mxj.NewMapXml(doc, r)
|
|
if m == nil || merr != nil {
|
|
return "", merr
|
|
}
|
|
|
|
b, berr := m.Json()
|
|
if berr != nil {
|
|
return "", berr
|
|
}
|
|
|
|
// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
|
|
return string(b), nil
|
|
}
|
|
|
|
// ByteDocToMap - convert an XML doc into a map[string]interface{}.
|
|
// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
|
|
// If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
|
|
// Note: recasting is only applied to element values, not attribute values.
|
|
func ByteDocToMap(doc []byte, recast ...bool) (map[string]interface{}, error) {
|
|
var r bool
|
|
if len(recast) == 1 {
|
|
r = recast[0]
|
|
}
|
|
return mxj.NewMapXml(doc, r)
|
|
}
|