zorg/vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go

177 lines
6.9 KiB
Go

package shared
import (
"strings"
"github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/goxpp"
)
// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
func IsExtension(p *xpp.XMLPullParser) bool {
space := strings.TrimSpace(p.Space)
if prefix, ok := p.Spaces[space]; ok {
return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
}
return p.Space != ""
}
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)
result, err := parseExtensionElement(p)
if err != nil {
return nil, err
}
// Ensure the extension prefix map exists
if _, ok := fe[prefix]; !ok {
fe[prefix] = map[string][]ext.Extension{}
}
// Ensure the extension element slice exists
if _, ok := fe[prefix][p.Name]; !ok {
fe[prefix][p.Name] = []ext.Extension{}
}
fe[prefix][p.Name] = append(fe[prefix][p.Name], result)
return fe, nil
}
func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}
e.Name = p.Name
e.Children = map[string][]ext.Extension{}
e.Attrs = map[string]string{}
for _, attr := range p.Attrs {
// TODO: Alright that we are stripping
// namespace information from attributes ?
e.Attrs[attr.Name.Local] = attr.Value
}
for {
tok, err := p.Next()
if err != nil {
return e, err
}
if tok == xpp.EndTag {
break
}
if tok == xpp.StartTag {
child, err := parseExtensionElement(p)
if err != nil {
return e, err
}
if _, ok := e.Children[child.Name]; !ok {
e.Children[child.Name] = []ext.Extension{}
}
e.Children[child.Name] = append(e.Children[child.Name], child)
} else if tok == xpp.Text {
e.Value += p.Text
}
}
e.Value = strings.TrimSpace(e.Value)
if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}
return e, nil
}
func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
// First we check if the global namespace map
// contains an entry for this namespace/prefix.
// This way we can use the canonical prefix for this
// ns instead of the one defined in the feed.
if prefix, ok := canonicalNamespaces[space]; ok {
return prefix
}
// Next we check if the feed itself defined this
// this namespace and return it if we have a result.
if prefix, ok := p.Spaces[space]; ok {
return prefix
}
// Lastly, any namespace which is not defined in the
// the feed will be the prefix itself when using Go's
// xml.Decoder.Token() method.
return space
}
// Namespaces taken from github.com/kurtmckee/feedparser
// These are used for determining canonical name space prefixes
// for many of the popular RSS/Atom extensions.
//
// These canonical prefixes override any prefixes used in the feed itself.
var canonicalNamespaces = map[string]string{
"http://webns.net/mvcb/": "admin",
"http://purl.org/rss/1.0/modules/aggregation/": "ag",
"http://purl.org/rss/1.0/modules/annotate/": "annotate",
"http://media.tangent.org/rss/1.0/": "audio",
"http://backend.userland.com/blogChannelModule": "blogChannel",
"http://creativecommons.org/ns#license": "cc",
"http://web.resource.org/cc/": "cc",
"http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": "creativeCommons",
"http://backend.userland.com/creativeCommonsRssModule": "creativeCommons",
"http://purl.org/rss/1.0/modules/company": "co",
"http://purl.org/rss/1.0/modules/content/": "content",
"http://my.theinfo.org/changed/1.0/rss/": "cp",
"http://purl.org/dc/elements/1.1/": "dc",
"http://purl.org/dc/terms/": "dcterms",
"http://purl.org/rss/1.0/modules/email/": "email",
"http://purl.org/rss/1.0/modules/event/": "ev",
"http://rssnamespace.org/feedburner/ext/1.0": "feedburner",
"http://freshmeat.net/rss/fm/": "fm",
"http://xmlns.com/foaf/0.1/": "foaf",
"http://www.w3.org/2003/01/geo/wgs84_pos#": "geo",
"http://www.georss.org/georss": "georss",
"http://www.opengis.net/gml": "gml",
"http://postneo.com/icbm/": "icbm",
"http://purl.org/rss/1.0/modules/image/": "image",
"http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
"http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
"http://purl.org/rss/1.0/modules/link/": "l",
"http://search.yahoo.com/mrss": "media",
"http://search.yahoo.com/mrss/": "media",
"http://madskills.com/public/xml/rss/module/pingback/": "pingback",
"http://prismstandard.org/namespaces/1.2/basic/": "prism",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
"http://purl.org/rss/1.0/modules/reference/": "ref",
"http://purl.org/rss/1.0/modules/richequiv/": "reqv",
"http://purl.org/rss/1.0/modules/search/": "search",
"http://purl.org/rss/1.0/modules/slash/": "slash",
"http://schemas.xmlsoap.org/soap/envelope/": "soap",
"http://purl.org/rss/1.0/modules/servicestatus/": "ss",
"http://hacks.benhammersley.com/rss/streaming/": "str",
"http://purl.org/rss/1.0/modules/subscription/": "sub",
"http://purl.org/rss/1.0/modules/syndication/": "sy",
"http://schemas.pocketsoap.com/rss/myDescModule/": "szf",
"http://purl.org/rss/1.0/modules/taxonomy/": "taxo",
"http://purl.org/rss/1.0/modules/threading/": "thr",
"http://purl.org/rss/1.0/modules/textinput/": "ti",
"http://madskills.com/public/xml/rss/module/trackback/": "trackback",
"http://wellformedweb.org/commentAPI/": "wfw",
"http://purl.org/rss/1.0/modules/wiki/": "wiki",
"http://www.w3.org/1999/xhtml": "xhtml",
"http://www.w3.org/1999/xlink": "xlink",
"http://www.w3.org/XML/1998/namespace": "xml",
"http://podlove.org/simple-chapters": "psc",
}