| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355 |
- // wrapper for non-node envs
- ;(function (sax) {
- sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
- sax.SAXParser = SAXParser
- sax.SAXStream = SAXStream
- sax.createStream = createStream
- // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
- // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
- // since that's the earliest that a buffer overrun could occur. This way, checks are
- // as rare as required, but as often as necessary to ensure never crossing this bound.
- // Furthermore, buffers are only tested at most once per write(), so passing a very
- // large string into write() might have undesirable effects, but this is manageable by
- // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
- // edge case, result in creating at most one complete copy of the string passed in.
- // Set to Infinity to have unlimited buffers.
- sax.MAX_BUFFER_LENGTH = 64 * 1024
- var buffers = [
- "comment", "sgmlDecl", "textNode", "tagName", "doctype",
- "procInstName", "procInstBody", "entity", "attribName",
- "attribValue", "cdata", "script"
- ]
- sax.EVENTS = // for discoverability.
- [ "text"
- , "processinginstruction"
- , "sgmldeclaration"
- , "doctype"
- , "comment"
- , "attribute"
- , "opentag"
- , "closetag"
- , "opencdata"
- , "cdata"
- , "closecdata"
- , "error"
- , "end"
- , "ready"
- , "script"
- , "opennamespace"
- , "closenamespace"
- ]
- function SAXParser (strict, opt) {
- if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
- var parser = this
- clearBuffers(parser)
- parser.q = parser.c = ""
- parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
- parser.opt = opt || {}
- parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
- parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
- parser.tags = []
- parser.closed = parser.closedRoot = parser.sawRoot = false
- parser.tag = parser.error = null
- parser.strict = !!strict
- parser.noscript = !!(strict || parser.opt.noscript)
- parser.state = S.BEGIN
- parser.ENTITIES = Object.create(sax.ENTITIES)
- parser.attribList = []
- // namespaces form a prototype chain.
- // it always points at the current tag,
- // which protos to its parent tag.
- if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
- // mostly just for error reporting
- parser.trackPosition = parser.opt.position !== false
- if (parser.trackPosition) {
- parser.position = parser.line = parser.column = 0
- }
- emit(parser, "onready")
- }
- if (!Object.create) Object.create = function (o) {
- function f () { this.__proto__ = o }
- f.prototype = o
- return new f
- }
- if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
- return o.__proto__
- }
- if (!Object.keys) Object.keys = function (o) {
- var a = []
- for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
- return a
- }
- function checkBufferLength (parser) {
- var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
- , maxActual = 0
- for (var i = 0, l = buffers.length; i < l; i ++) {
- var len = parser[buffers[i]].length
- if (len > maxAllowed) {
- // Text/cdata nodes can get big, and since they're buffered,
- // we can get here under normal conditions.
- // Avoid issues by emitting the text node now,
- // so at least it won't get any bigger.
- switch (buffers[i]) {
- case "textNode":
- closeText(parser)
- break
- case "cdata":
- emitNode(parser, "oncdata", parser.cdata)
- parser.cdata = ""
- break
- case "script":
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
- break
- default:
- error(parser, "Max buffer length exceeded: "+buffers[i])
- }
- }
- maxActual = Math.max(maxActual, len)
- }
- // schedule the next check for the earliest possible buffer overrun.
- parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
- + parser.position
- }
- function clearBuffers (parser) {
- for (var i = 0, l = buffers.length; i < l; i ++) {
- parser[buffers[i]] = ""
- }
- }
- function flushBuffers (parser) {
- closeText(parser)
- if (parser.cdata !== "") {
- emitNode(parser, "oncdata", parser.cdata)
- parser.cdata = ""
- }
- if (parser.script !== "") {
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
- }
- }
- SAXParser.prototype =
- { end: function () { end(this) }
- , write: write
- , resume: function () { this.error = null; return this }
- , close: function () { return this.write(null) }
- , flush: function () { flushBuffers(this) }
- }
- try {
- var Stream = require("stream").Stream
- } catch (ex) {
- var Stream = function () {}
- }
- var streamWraps = sax.EVENTS.filter(function (ev) {
- return ev !== "error" && ev !== "end"
- })
- function createStream (strict, opt) {
- return new SAXStream(strict, opt)
- }
- function SAXStream (strict, opt) {
- if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
- Stream.apply(this)
- this._parser = new SAXParser(strict, opt)
- this.writable = true
- this.readable = true
- var me = this
- this._parser.onend = function () {
- me.emit("end")
- }
- this._parser.onerror = function (er) {
- me.emit("error", er)
- // if didn't throw, then means error was handled.
- // go ahead and clear error, so we can write again.
- me._parser.error = null
- }
- this._decoder = null;
- streamWraps.forEach(function (ev) {
- Object.defineProperty(me, "on" + ev, {
- get: function () { return me._parser["on" + ev] },
- set: function (h) {
- if (!h) {
- me.removeAllListeners(ev)
- return me._parser["on"+ev] = h
- }
- me.on(ev, h)
- },
- enumerable: true,
- configurable: false
- })
- })
- }
- SAXStream.prototype = Object.create(Stream.prototype,
- { constructor: { value: SAXStream } })
- SAXStream.prototype.write = function (data) {
- if (typeof Buffer === 'function' &&
- typeof Buffer.isBuffer === 'function' &&
- Buffer.isBuffer(data)) {
- if (!this._decoder) {
- var SD = require('string_decoder').StringDecoder
- this._decoder = new SD('utf8')
- }
- data = this._decoder.write(data);
- }
- this._parser.write(data.toString())
- this.emit("data", data)
- return true
- }
- SAXStream.prototype.end = function (chunk) {
- if (chunk && chunk.length) this.write(chunk)
- this._parser.end()
- return true
- }
- SAXStream.prototype.on = function (ev, handler) {
- var me = this
- if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
- me._parser["on"+ev] = function () {
- var args = arguments.length === 1 ? [arguments[0]]
- : Array.apply(null, arguments)
- args.splice(0, 0, ev)
- me.emit.apply(me, args)
- }
- }
- return Stream.prototype.on.call(me, ev, handler)
- }
- // character classes and tokens
- var whitespace = "\r\n\t "
- // this really needs to be replaced with character classes.
- // XML allows all manner of ridiculous numbers and digits.
- , number = "0124356789"
- , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
- // (Letter | "_" | ":")
- , quote = "'\""
- , entity = number+letter+"#"
- , attribEnd = whitespace + ">"
- , CDATA = "[CDATA["
- , DOCTYPE = "DOCTYPE"
- , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
- , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
- , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
- // turn all the string character sets into character class objects.
- whitespace = charClass(whitespace)
- number = charClass(number)
- letter = charClass(letter)
- // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
- // This implementation works on strings, a single character at a time
- // as such, it cannot ever support astral-plane characters (10000-EFFFF)
- // without a significant breaking change to either this parser, or the
- // JavaScript language. Implementation of an emoji-capable xml parser
- // is left as an exercise for the reader.
- var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
- var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
- quote = charClass(quote)
- entity = charClass(entity)
- attribEnd = charClass(attribEnd)
- function charClass (str) {
- return str.split("").reduce(function (s, c) {
- s[c] = true
- return s
- }, {})
- }
- function isRegExp (c) {
- return Object.prototype.toString.call(c) === '[object RegExp]'
- }
- function is (charclass, c) {
- return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
- }
- function not (charclass, c) {
- return !is(charclass, c)
- }
- var S = 0
- sax.STATE =
- { BEGIN : S++
- , TEXT : S++ // general stuff
- , TEXT_ENTITY : S++ // & and such.
- , OPEN_WAKA : S++ // <
- , SGML_DECL : S++ // <!BLARG
- , SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
- , DOCTYPE : S++ // <!DOCTYPE
- , DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
- , DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
- , DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
- , COMMENT_STARTING : S++ // <!-
- , COMMENT : S++ // <!--
- , COMMENT_ENDING : S++ // <!-- blah -
- , COMMENT_ENDED : S++ // <!-- blah --
- , CDATA : S++ // <![CDATA[ something
- , CDATA_ENDING : S++ // ]
- , CDATA_ENDING_2 : S++ // ]]
- , PROC_INST : S++ // <?hi
- , PROC_INST_BODY : S++ // <?hi there
- , PROC_INST_ENDING : S++ // <?hi "there" ?
- , OPEN_TAG : S++ // <strong
- , OPEN_TAG_SLASH : S++ // <strong /
- , ATTRIB : S++ // <a
- , ATTRIB_NAME : S++ // <a foo
- , ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
- , ATTRIB_VALUE : S++ // <a foo=
- , ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
- , ATTRIB_VALUE_CLOSED : S++ // <a foo="bar"
- , ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
- , ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="""
- , ATTRIB_VALUE_ENTITY_U : S++ // <foo bar="
- , CLOSE_TAG : S++ // </a
- , CLOSE_TAG_SAW_WHITE : S++ // </a >
- , SCRIPT : S++ // <script> ...
- , SCRIPT_ENDING : S++ // <script> ... <
- }
- sax.ENTITIES =
- { "amp" : "&"
- , "gt" : ">"
- , "lt" : "<"
- , "quot" : "\""
- , "apos" : "'"
- , "AElig" : 198
- , "Aacute" : 193
- , "Acirc" : 194
- , "Agrave" : 192
- , "Aring" : 197
- , "Atilde" : 195
- , "Auml" : 196
- , "Ccedil" : 199
- , "ETH" : 208
- , "Eacute" : 201
- , "Ecirc" : 202
- , "Egrave" : 200
- , "Euml" : 203
- , "Iacute" : 205
- , "Icirc" : 206
- , "Igrave" : 204
- , "Iuml" : 207
- , "Ntilde" : 209
- , "Oacute" : 211
- , "Ocirc" : 212
- , "Ograve" : 210
- , "Oslash" : 216
- , "Otilde" : 213
- , "Ouml" : 214
- , "THORN" : 222
- , "Uacute" : 218
- , "Ucirc" : 219
- , "Ugrave" : 217
- , "Uuml" : 220
- , "Yacute" : 221
- , "aacute" : 225
- , "acirc" : 226
- , "aelig" : 230
- , "agrave" : 224
- , "aring" : 229
- , "atilde" : 227
- , "auml" : 228
- , "ccedil" : 231
- , "eacute" : 233
- , "ecirc" : 234
- , "egrave" : 232
- , "eth" : 240
- , "euml" : 235
- , "iacute" : 237
- , "icirc" : 238
- , "igrave" : 236
- , "iuml" : 239
- , "ntilde" : 241
- , "oacute" : 243
- , "ocirc" : 244
- , "ograve" : 242
- , "oslash" : 248
- , "otilde" : 245
- , "ouml" : 246
- , "szlig" : 223
- , "thorn" : 254
- , "uacute" : 250
- , "ucirc" : 251
- , "ugrave" : 249
- , "uuml" : 252
- , "yacute" : 253
- , "yuml" : 255
- , "copy" : 169
- , "reg" : 174
- , "nbsp" : 160
- , "iexcl" : 161
- , "cent" : 162
- , "pound" : 163
- , "curren" : 164
- , "yen" : 165
- , "brvbar" : 166
- , "sect" : 167
- , "uml" : 168
- , "ordf" : 170
- , "laquo" : 171
- , "not" : 172
- , "shy" : 173
- , "macr" : 175
- , "deg" : 176
- , "plusmn" : 177
- , "sup1" : 185
- , "sup2" : 178
- , "sup3" : 179
- , "acute" : 180
- , "micro" : 181
- , "para" : 182
- , "middot" : 183
- , "cedil" : 184
- , "ordm" : 186
- , "raquo" : 187
- , "frac14" : 188
- , "frac12" : 189
- , "frac34" : 190
- , "iquest" : 191
- , "times" : 215
- , "divide" : 247
- , "OElig" : 338
- , "oelig" : 339
- , "Scaron" : 352
- , "scaron" : 353
- , "Yuml" : 376
- , "fnof" : 402
- , "circ" : 710
- , "tilde" : 732
- , "Alpha" : 913
- , "Beta" : 914
- , "Gamma" : 915
- , "Delta" : 916
- , "Epsilon" : 917
- , "Zeta" : 918
- , "Eta" : 919
- , "Theta" : 920
- , "Iota" : 921
- , "Kappa" : 922
- , "Lambda" : 923
- , "Mu" : 924
- , "Nu" : 925
- , "Xi" : 926
- , "Omicron" : 927
- , "Pi" : 928
- , "Rho" : 929
- , "Sigma" : 931
- , "Tau" : 932
- , "Upsilon" : 933
- , "Phi" : 934
- , "Chi" : 935
- , "Psi" : 936
- , "Omega" : 937
- , "alpha" : 945
- , "beta" : 946
- , "gamma" : 947
- , "delta" : 948
- , "epsilon" : 949
- , "zeta" : 950
- , "eta" : 951
- , "theta" : 952
- , "iota" : 953
- , "kappa" : 954
- , "lambda" : 955
- , "mu" : 956
- , "nu" : 957
- , "xi" : 958
- , "omicron" : 959
- , "pi" : 960
- , "rho" : 961
- , "sigmaf" : 962
- , "sigma" : 963
- , "tau" : 964
- , "upsilon" : 965
- , "phi" : 966
- , "chi" : 967
- , "psi" : 968
- , "omega" : 969
- , "thetasym" : 977
- , "upsih" : 978
- , "piv" : 982
- , "ensp" : 8194
- , "emsp" : 8195
- , "thinsp" : 8201
- , "zwnj" : 8204
- , "zwj" : 8205
- , "lrm" : 8206
- , "rlm" : 8207
- , "ndash" : 8211
- , "mdash" : 8212
- , "lsquo" : 8216
- , "rsquo" : 8217
- , "sbquo" : 8218
- , "ldquo" : 8220
- , "rdquo" : 8221
- , "bdquo" : 8222
- , "dagger" : 8224
- , "Dagger" : 8225
- , "bull" : 8226
- , "hellip" : 8230
- , "permil" : 8240
- , "prime" : 8242
- , "Prime" : 8243
- , "lsaquo" : 8249
- , "rsaquo" : 8250
- , "oline" : 8254
- , "frasl" : 8260
- , "euro" : 8364
- , "image" : 8465
- , "weierp" : 8472
- , "real" : 8476
- , "trade" : 8482
- , "alefsym" : 8501
- , "larr" : 8592
- , "uarr" : 8593
- , "rarr" : 8594
- , "darr" : 8595
- , "harr" : 8596
- , "crarr" : 8629
- , "lArr" : 8656
- , "uArr" : 8657
- , "rArr" : 8658
- , "dArr" : 8659
- , "hArr" : 8660
- , "forall" : 8704
- , "part" : 8706
- , "exist" : 8707
- , "empty" : 8709
- , "nabla" : 8711
- , "isin" : 8712
- , "notin" : 8713
- , "ni" : 8715
- , "prod" : 8719
- , "sum" : 8721
- , "minus" : 8722
- , "lowast" : 8727
- , "radic" : 8730
- , "prop" : 8733
- , "infin" : 8734
- , "ang" : 8736
- , "and" : 8743
- , "or" : 8744
- , "cap" : 8745
- , "cup" : 8746
- , "int" : 8747
- , "there4" : 8756
- , "sim" : 8764
- , "cong" : 8773
- , "asymp" : 8776
- , "ne" : 8800
- , "equiv" : 8801
- , "le" : 8804
- , "ge" : 8805
- , "sub" : 8834
- , "sup" : 8835
- , "nsub" : 8836
- , "sube" : 8838
- , "supe" : 8839
- , "oplus" : 8853
- , "otimes" : 8855
- , "perp" : 8869
- , "sdot" : 8901
- , "lceil" : 8968
- , "rceil" : 8969
- , "lfloor" : 8970
- , "rfloor" : 8971
- , "lang" : 9001
- , "rang" : 9002
- , "loz" : 9674
- , "spades" : 9824
- , "clubs" : 9827
- , "hearts" : 9829
- , "diams" : 9830
- }
- Object.keys(sax.ENTITIES).forEach(function (key) {
- var e = sax.ENTITIES[key]
- var s = typeof e === 'number' ? String.fromCharCode(e) : e
- sax.ENTITIES[key] = s
- })
- for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
- // shorthand
- S = sax.STATE
- function emit (parser, event, data) {
- parser[event] && parser[event](data)
- }
- function emitNode (parser, nodeType, data) {
- if (parser.textNode) closeText(parser)
- emit(parser, nodeType, data)
- }
- function closeText (parser) {
- parser.textNode = textopts(parser.opt, parser.textNode)
- if (parser.textNode) emit(parser, "ontext", parser.textNode)
- parser.textNode = ""
- }
- function textopts (opt, text) {
- if (opt.trim) text = text.trim()
- if (opt.normalize) text = text.replace(/\s+/g, " ")
- return text
- }
- function error (parser, er) {
- closeText(parser)
- if (parser.trackPosition) {
- er += "\nLine: "+parser.line+
- "\nColumn: "+parser.column+
- "\nChar: "+parser.c
- }
- er = new Error(er)
- parser.error = er
- emit(parser, "onerror", er)
- return parser
- }
- function end (parser) {
- if (!parser.closedRoot) strictFail(parser, "Unclosed root tag")
- if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end")
- closeText(parser)
- parser.c = ""
- parser.closed = true
- emit(parser, "onend")
- SAXParser.call(parser, parser.strict, parser.opt)
- return parser
- }
- function strictFail (parser, message) {
- if (typeof parser !== 'object' || !(parser instanceof SAXParser))
- throw new Error('bad call to strictFail');
- if (parser.strict) error(parser, message)
- }
- function newTag (parser) {
- if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
- var parent = parser.tags[parser.tags.length - 1] || parser
- , tag = parser.tag = { name : parser.tagName, attributes : {} }
- // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
- if (parser.opt.xmlns) tag.ns = parent.ns
- parser.attribList.length = 0
- }
- function qname (name, attribute) {
- var i = name.indexOf(":")
- , qualName = i < 0 ? [ "", name ] : name.split(":")
- , prefix = qualName[0]
- , local = qualName[1]
- // <x "xmlns"="http://foo">
- if (attribute && name === "xmlns") {
- prefix = "xmlns"
- local = ""
- }
- return { prefix: prefix, local: local }
- }
- function attrib (parser) {
- if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
- if (parser.attribList.indexOf(parser.attribName) !== -1 ||
- parser.tag.attributes.hasOwnProperty(parser.attribName)) {
- return parser.attribName = parser.attribValue = ""
- }
- if (parser.opt.xmlns) {
- var qn = qname(parser.attribName, true)
- , prefix = qn.prefix
- , local = qn.local
- if (prefix === "xmlns") {
- // namespace binding attribute; push the binding into scope
- if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
- strictFail( parser
- , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
- + "Actual: " + parser.attribValue )
- } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
- strictFail( parser
- , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
- + "Actual: " + parser.attribValue )
- } else {
- var tag = parser.tag
- , parent = parser.tags[parser.tags.length - 1] || parser
- if (tag.ns === parent.ns) {
- tag.ns = Object.create(parent.ns)
- }
- tag.ns[local] = parser.attribValue
- }
- }
- // defer onattribute events until all attributes have been seen
- // so any new bindings can take effect; preserve attribute order
- // so deferred events can be emitted in document order
- parser.attribList.push([parser.attribName, parser.attribValue])
- } else {
- // in non-xmlns mode, we can emit the event right away
- parser.tag.attributes[parser.attribName] = parser.attribValue
- emitNode( parser
- , "onattribute"
- , { name: parser.attribName
- , value: parser.attribValue } )
- }
- parser.attribName = parser.attribValue = ""
- }
- function openTag (parser, selfClosing) {
- if (parser.opt.xmlns) {
- // emit namespace binding events
- var tag = parser.tag
- // add namespace info to tag
- var qn = qname(parser.tagName)
- tag.prefix = qn.prefix
- tag.local = qn.local
- tag.uri = tag.ns[qn.prefix] || ""
- if (tag.prefix && !tag.uri) {
- strictFail(parser, "Unbound namespace prefix: "
- + JSON.stringify(parser.tagName))
- tag.uri = qn.prefix
- }
- var parent = parser.tags[parser.tags.length - 1] || parser
- if (tag.ns && parent.ns !== tag.ns) {
- Object.keys(tag.ns).forEach(function (p) {
- emitNode( parser
- , "onopennamespace"
- , { prefix: p , uri: tag.ns[p] } )
- })
- }
- // handle deferred onattribute events
- // Note: do not apply default ns to attributes:
- // http://www.w3.org/TR/REC-xml-names/#defaulting
- for (var i = 0, l = parser.attribList.length; i < l; i ++) {
- var nv = parser.attribList[i]
- var name = nv[0]
- , value = nv[1]
- , qualName = qname(name, true)
- , prefix = qualName.prefix
- , local = qualName.local
- , uri = prefix == "" ? "" : (tag.ns[prefix] || "")
- , a = { name: name
- , value: value
- , prefix: prefix
- , local: local
- , uri: uri
- }
- // if there's any attributes with an undefined namespace,
- // then fail on them now.
- if (prefix && prefix != "xmlns" && !uri) {
- strictFail(parser, "Unbound namespace prefix: "
- + JSON.stringify(prefix))
- a.uri = prefix
- }
- parser.tag.attributes[name] = a
- emitNode(parser, "onattribute", a)
- }
- parser.attribList.length = 0
- }
- parser.tag.isSelfClosing = !!selfClosing
- // process the tag
- parser.sawRoot = true
- parser.tags.push(parser.tag)
- emitNode(parser, "onopentag", parser.tag)
- if (!selfClosing) {
- // special case for <script> in non-strict mode.
- if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
- parser.state = S.SCRIPT
- } else {
- parser.state = S.TEXT
- }
- parser.tag = null
- parser.tagName = ""
- }
- parser.attribName = parser.attribValue = ""
- parser.attribList.length = 0
- }
- function closeTag (parser) {
- if (!parser.tagName) {
- strictFail(parser, "Weird empty close tag.")
- parser.textNode += "</>"
- parser.state = S.TEXT
- return
- }
- if (parser.script) {
- if (parser.tagName !== "script") {
- parser.script += "</" + parser.tagName + ">"
- parser.tagName = ""
- parser.state = S.SCRIPT
- return
- }
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
- }
- // first make sure that the closing tag actually exists.
- // <a><b></c></b></a> will close everything, otherwise.
- var t = parser.tags.length
- var tagName = parser.tagName
- if (!parser.strict) tagName = tagName[parser.looseCase]()
- var closeTo = tagName
- while (t --) {
- var close = parser.tags[t]
- if (close.name !== closeTo) {
- // fail the first time in strict mode
- strictFail(parser, "Unexpected close tag")
- } else break
- }
- // didn't find it. we already failed for strict, so just abort.
- if (t < 0) {
- strictFail(parser, "Unmatched closing tag: "+parser.tagName)
- parser.textNode += "</" + parser.tagName + ">"
- parser.state = S.TEXT
- return
- }
- parser.tagName = tagName
- var s = parser.tags.length
- while (s --> t) {
- var tag = parser.tag = parser.tags.pop()
- parser.tagName = parser.tag.name
- emitNode(parser, "onclosetag", parser.tagName)
- var x = {}
- for (var i in tag.ns) x[i] = tag.ns[i]
- var parent = parser.tags[parser.tags.length - 1] || parser
- if (parser.opt.xmlns && tag.ns !== parent.ns) {
- // remove namespace bindings introduced by tag
- Object.keys(tag.ns).forEach(function (p) {
- var n = tag.ns[p]
- emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
- })
- }
- }
- if (t === 0) parser.closedRoot = true
- parser.tagName = parser.attribValue = parser.attribName = ""
- parser.attribList.length = 0
- parser.state = S.TEXT
- }
- function parseEntity (parser) {
- var entity = parser.entity
- , entityLC = entity.toLowerCase()
- , num
- , numStr = ""
- if (parser.ENTITIES[entity])
- return parser.ENTITIES[entity]
- if (parser.ENTITIES[entityLC])
- return parser.ENTITIES[entityLC]
- entity = entityLC
- if (entity.charAt(0) === "#") {
- if (entity.charAt(1) === "x") {
- entity = entity.slice(2)
- num = parseInt(entity, 16)
- numStr = num.toString(16)
- } else {
- entity = entity.slice(1)
- num = parseInt(entity, 10)
- numStr = num.toString(10)
- }
- }
- entity = entity.replace(/^0+/, "")
- if (numStr.toLowerCase() !== entity) {
- strictFail(parser, "Invalid character entity")
- return "&"+parser.entity + ";"
- }
- return String.fromCharCode(num)
- }
- function write (chunk) {
- var parser = this
- if (this.error) throw this.error
- if (parser.closed) return error(parser,
- "Cannot write after close. Assign an onready handler.")
- if (chunk === null) return end(parser)
- var i = 0, c = ""
- while (parser.c = c = chunk.charAt(i++)) {
- if (parser.trackPosition) {
- parser.position ++
- if (c === "\n") {
- parser.line ++
- parser.column = 0
- } else parser.column ++
- }
- switch (parser.state) {
- case S.BEGIN:
- if (c === "<") {
- parser.state = S.OPEN_WAKA
- parser.startTagPosition = parser.position
- } else if (not(whitespace,c)) {
- // have to process this as a text node.
- // weird, but happens.
- strictFail(parser, "Non-whitespace before first tag.")
- parser.textNode = c
- parser.state = S.TEXT
- }
- continue
- case S.TEXT:
- if (parser.sawRoot && !parser.closedRoot) {
- var starti = i-1
- while (c && c!=="<" && c!=="&") {
- c = chunk.charAt(i++)
- if (c && parser.trackPosition) {
- parser.position ++
- if (c === "\n") {
- parser.line ++
- parser.column = 0
- } else parser.column ++
- }
- }
- parser.textNode += chunk.substring(starti, i-1)
- }
- if (c === "<") {
- parser.state = S.OPEN_WAKA
- parser.startTagPosition = parser.position
- } else {
- if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
- strictFail(parser, "Text data outside of root node.")
- if (c === "&") parser.state = S.TEXT_ENTITY
- else parser.textNode += c
- }
- continue
- case S.SCRIPT:
- // only non-strict
- if (c === "<") {
- parser.state = S.SCRIPT_ENDING
- } else parser.script += c
- continue
- case S.SCRIPT_ENDING:
- if (c === "/") {
- parser.state = S.CLOSE_TAG
- } else {
- parser.script += "<" + c
- parser.state = S.SCRIPT
- }
- continue
- case S.OPEN_WAKA:
- // either a /, ?, !, or text is coming next.
- if (c === "!") {
- parser.state = S.SGML_DECL
- parser.sgmlDecl = ""
- } else if (is(whitespace, c)) {
- // wait for it...
- } else if (is(nameStart,c)) {
- parser.state = S.OPEN_TAG
- parser.tagName = c
- } else if (c === "/") {
- parser.state = S.CLOSE_TAG
- parser.tagName = ""
- } else if (c === "?") {
- parser.state = S.PROC_INST
- parser.procInstName = parser.procInstBody = ""
- } else {
- strictFail(parser, "Unencoded <")
- // if there was some whitespace, then add that in.
- if (parser.startTagPosition + 1 < parser.position) {
- var pad = parser.position - parser.startTagPosition
- c = new Array(pad).join(" ") + c
- }
- parser.textNode += "<" + c
- parser.state = S.TEXT
- }
- continue
- case S.SGML_DECL:
- if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
- emitNode(parser, "onopencdata")
- parser.state = S.CDATA
- parser.sgmlDecl = ""
- parser.cdata = ""
- } else if (parser.sgmlDecl+c === "--") {
- parser.state = S.COMMENT
- parser.comment = ""
- parser.sgmlDecl = ""
- } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
- parser.state = S.DOCTYPE
- if (parser.doctype || parser.sawRoot) strictFail(parser,
- "Inappropriately located doctype declaration")
- parser.doctype = ""
- parser.sgmlDecl = ""
- } else if (c === ">") {
- emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
- parser.sgmlDecl = ""
- parser.state = S.TEXT
- } else if (is(quote, c)) {
- parser.state = S.SGML_DECL_QUOTED
- parser.sgmlDecl += c
- } else parser.sgmlDecl += c
- continue
- case S.SGML_DECL_QUOTED:
- if (c === parser.q) {
- parser.state = S.SGML_DECL
- parser.q = ""
- }
- parser.sgmlDecl += c
- continue
- case S.DOCTYPE:
- if (c === ">") {
- parser.state = S.TEXT
- emitNode(parser, "ondoctype", parser.doctype)
- parser.doctype = true // just remember that we saw it.
- } else {
- parser.doctype += c
- if (c === "[") parser.state = S.DOCTYPE_DTD
- else if (is(quote, c)) {
- parser.state = S.DOCTYPE_QUOTED
- parser.q = c
- }
- }
- continue
- case S.DOCTYPE_QUOTED:
- parser.doctype += c
- if (c === parser.q) {
- parser.q = ""
- parser.state = S.DOCTYPE
- }
- continue
- case S.DOCTYPE_DTD:
- parser.doctype += c
- if (c === "]") parser.state = S.DOCTYPE
- else if (is(quote,c)) {
- parser.state = S.DOCTYPE_DTD_QUOTED
- parser.q = c
- }
- continue
- case S.DOCTYPE_DTD_QUOTED:
- parser.doctype += c
- if (c === parser.q) {
- parser.state = S.DOCTYPE_DTD
- parser.q = ""
- }
- continue
- case S.COMMENT:
- if (c === "-") parser.state = S.COMMENT_ENDING
- else parser.comment += c
- continue
- case S.COMMENT_ENDING:
- if (c === "-") {
- parser.state = S.COMMENT_ENDED
- parser.comment = textopts(parser.opt, parser.comment)
- if (parser.comment) emitNode(parser, "oncomment", parser.comment)
- parser.comment = ""
- } else {
- parser.comment += "-" + c
- parser.state = S.COMMENT
- }
- continue
- case S.COMMENT_ENDED:
- if (c !== ">") {
- strictFail(parser, "Malformed comment")
- // allow <!-- blah -- bloo --> in non-strict mode,
- // which is a comment of " blah -- bloo "
- parser.comment += "--" + c
- parser.state = S.COMMENT
- } else parser.state = S.TEXT
- continue
- case S.CDATA:
- if (c === "]") parser.state = S.CDATA_ENDING
- else parser.cdata += c
- continue
- case S.CDATA_ENDING:
- if (c === "]") parser.state = S.CDATA_ENDING_2
- else {
- parser.cdata += "]" + c
- parser.state = S.CDATA
- }
- continue
- case S.CDATA_ENDING_2:
- if (c === ">") {
- if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
- emitNode(parser, "onclosecdata")
- parser.cdata = ""
- parser.state = S.TEXT
- } else if (c === "]") {
- parser.cdata += "]"
- } else {
- parser.cdata += "]]" + c
- parser.state = S.CDATA
- }
- continue
- case S.PROC_INST:
- if (c === "?") parser.state = S.PROC_INST_ENDING
- else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
- else parser.procInstName += c
- continue
- case S.PROC_INST_BODY:
- if (!parser.procInstBody && is(whitespace, c)) continue
- else if (c === "?") parser.state = S.PROC_INST_ENDING
- else parser.procInstBody += c
- continue
- case S.PROC_INST_ENDING:
- if (c === ">") {
- emitNode(parser, "onprocessinginstruction", {
- name : parser.procInstName,
- body : parser.procInstBody
- })
- parser.procInstName = parser.procInstBody = ""
- parser.state = S.TEXT
- } else {
- parser.procInstBody += "?" + c
- parser.state = S.PROC_INST_BODY
- }
- continue
- case S.OPEN_TAG:
- if (is(nameBody, c)) parser.tagName += c
- else {
- newTag(parser)
- if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else {
- if (not(whitespace, c)) strictFail(
- parser, "Invalid character in tag name")
- parser.state = S.ATTRIB
- }
- }
- continue
- case S.OPEN_TAG_SLASH:
- if (c === ">") {
- openTag(parser, true)
- closeTag(parser)
- } else {
- strictFail(parser, "Forward-slash in opening tag not followed by >")
- parser.state = S.ATTRIB
- }
- continue
- case S.ATTRIB:
- // haven't read the attribute name yet.
- if (is(whitespace, c)) continue
- else if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else if (is(nameStart, c)) {
- parser.attribName = c
- parser.attribValue = ""
- parser.state = S.ATTRIB_NAME
- } else strictFail(parser, "Invalid attribute name")
- continue
- case S.ATTRIB_NAME:
- if (c === "=") parser.state = S.ATTRIB_VALUE
- else if (c === ">") {
- strictFail(parser, "Attribute without value")
- parser.attribValue = parser.attribName
- attrib(parser)
- openTag(parser)
- }
- else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
- else if (is(nameBody, c)) parser.attribName += c
- else strictFail(parser, "Invalid attribute name")
- continue
- case S.ATTRIB_NAME_SAW_WHITE:
- if (c === "=") parser.state = S.ATTRIB_VALUE
- else if (is(whitespace, c)) continue
- else {
- strictFail(parser, "Attribute without value")
- parser.tag.attributes[parser.attribName] = ""
- parser.attribValue = ""
- emitNode(parser, "onattribute",
- { name : parser.attribName, value : "" })
- parser.attribName = ""
- if (c === ">") openTag(parser)
- else if (is(nameStart, c)) {
- parser.attribName = c
- parser.state = S.ATTRIB_NAME
- } else {
- strictFail(parser, "Invalid attribute name")
- parser.state = S.ATTRIB
- }
- }
- continue
- case S.ATTRIB_VALUE:
- if (is(whitespace, c)) continue
- else if (is(quote, c)) {
- parser.q = c
- parser.state = S.ATTRIB_VALUE_QUOTED
- } else {
- strictFail(parser, "Unquoted attribute value")
- parser.state = S.ATTRIB_VALUE_UNQUOTED
- parser.attribValue = c
- }
- continue
- case S.ATTRIB_VALUE_QUOTED:
- if (c !== parser.q) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
- else parser.attribValue += c
- continue
- }
- attrib(parser)
- parser.q = ""
- parser.state = S.ATTRIB_VALUE_CLOSED
- continue
- case S.ATTRIB_VALUE_CLOSED:
- if (is(whitespace, c)) {
- parser.state = S.ATTRIB
- } else if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else if (is(nameStart, c)) {
- strictFail(parser, "No whitespace between attributes")
- parser.attribName = c
- parser.attribValue = ""
- parser.state = S.ATTRIB_NAME
- } else strictFail(parser, "Invalid attribute name")
- continue
- case S.ATTRIB_VALUE_UNQUOTED:
- if (not(attribEnd,c)) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
- else parser.attribValue += c
- continue
- }
- attrib(parser)
- if (c === ">") openTag(parser)
- else parser.state = S.ATTRIB
- continue
- case S.CLOSE_TAG:
- if (!parser.tagName) {
- if (is(whitespace, c)) continue
- else if (not(nameStart, c)) {
- if (parser.script) {
- parser.script += "</" + c
- parser.state = S.SCRIPT
- } else {
- strictFail(parser, "Invalid tagname in closing tag.")
- }
- } else parser.tagName = c
- }
- else if (c === ">") closeTag(parser)
- else if (is(nameBody, c)) parser.tagName += c
- else if (parser.script) {
- parser.script += "</" + parser.tagName
- parser.tagName = ""
- parser.state = S.SCRIPT
- } else {
- if (not(whitespace, c)) strictFail(parser,
- "Invalid tagname in closing tag")
- parser.state = S.CLOSE_TAG_SAW_WHITE
- }
- continue
- case S.CLOSE_TAG_SAW_WHITE:
- if (is(whitespace, c)) continue
- if (c === ">") closeTag(parser)
- else strictFail(parser, "Invalid characters in closing tag")
- continue
- case S.TEXT_ENTITY:
- case S.ATTRIB_VALUE_ENTITY_Q:
- case S.ATTRIB_VALUE_ENTITY_U:
- switch(parser.state) {
- case S.TEXT_ENTITY:
- var returnState = S.TEXT, buffer = "textNode"
- break
- case S.ATTRIB_VALUE_ENTITY_Q:
- var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
- break
- case S.ATTRIB_VALUE_ENTITY_U:
- var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
- break
- }
- if (c === ";") {
- parser[buffer] += parseEntity(parser)
- parser.entity = ""
- parser.state = returnState
- }
- else if (is(entity, c)) parser.entity += c
- else {
- strictFail(parser, "Invalid character entity")
- parser[buffer] += "&" + parser.entity + c
- parser.entity = ""
- parser.state = returnState
- }
- continue
- default:
- throw new Error(parser, "Unknown state: " + parser.state)
- }
- } // while
- // cdata blocks can get very big under normal conditions. emit and move on.
- // if (parser.state === S.CDATA && parser.cdata) {
- // emitNode(parser, "oncdata", parser.cdata)
- // parser.cdata = ""
- // }
- if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
- return parser
- }
- })(typeof exports === "undefined" ? sax = {} : exports)
|