Syntax and Lexical Rules for Extensible Markup Language

// Copyright (c) 2008-2010 by Kavanagh Consultancy Limited. All rights reserved.

// This pattern file defines the syntax rules and lexical structure of
// <a href="http://www.w3.org/TR/2006/REC-xml-20060816">Extensible Markup Language (XML) 1.0 (Fourth Edition)</a>
// including errata as of 18 January 2008 (from proposed Fifth Edition)

ExtensibleMarkupLanguage : // Syntax Rules

extensibleMarkup := document

// Document
// [1] document ::= prolog element Misc*

document := prolog element { Misc }*

// Character Range
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
//              /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */

// (Lexical Rules)
// Char := '\t' | '\n' | '\r' | 0x20..0xD7FF | 0xE000..0xFFFD | 0x10000..0x10FFFF

// White Space
// [3] S ::= (#x20 | #x9 | #xD | #xA)+

// (Lexical Rules)
// S ::= { ' ' | '\t' | '\r' | '\n' }+

// Names and Tokens
// [4]  NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D]
//                        | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF]
//                        | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
// [4a] NameChar      ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
// [5]  Name          ::= NameStartChar (NameChar)*
// [6]  Names         ::= Name (#x20 Name)*
// [7]  Nmtoken       ::= (NameChar)+
// [8]  Nmtokens      ::= Nmtoken (#x20 Nmtoken)*

// (Lexical Rules)
// NameStartChar  := ':' | 'A'..'Z' | '_' | 'a'..'z' | 0xC0..0xD6 | 0xD8..0xF6 | 0xF8..0x2FF | 0x370..0x37D
//                   | 0x37F..0x1FFF | 0x200C..0x200D | 0x2070..0x218F | 0x2C00..0x2FEF
//                   | 0x3001..0xD7FF | 0xF900..0xFDCF | 0xFDF0..0xFFFD | 0x10000..0xEFFFF
// NameChar       := NameStartChar | '-' | '.' | '0'..'9' | 0xB7 | 0x0300..0x036F | 0x203F..0x2040
// Name          ::= NameStartChar { NameChar }*
// Names         ::= Name { ' ' Name }*
// Nmtoken       ::= { NameChar }+
// Nmtokens      ::= Nmtoken { ' ' Nmtoken }*

// Literals
// [9]  EntityValue   ::= '"' ([^%&"] | PEReference | Reference)* '"'
//                      | "'" ([^%&'] | PEReference | Reference)* "'"
// [10] AttValue      ::= '"' ([^<&"] | Reference)* '"'
//                      | "'" ([^<&'] | Reference)* "'"
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
// [12] PubidLiteral  ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
// [13] PubidChar     ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]

// (Lexical Rules)
// EntityValue       ::= '"' { ! '%' ! '&' ! '"' Char | PEReference | Reference }* '"'
//                     | "'" { ! '%' ! '&' ! "'" Char | PEReference | Reference }* "'"
// AttValue           := AttValueQuot_ { AttValueQuotData_ | Reference }* AttValueQuot_
//                     | AttValueApos_ { AttValueAposData_ | Reference }* AttValueApos_
// AttValueQuot_     ::= '"'
// AttValueQuotData_ ::= { ! '<' ! '&' ! '"' Char }+
// AttValueApos_     ::= "'"
// AttValueAposData_ ::= { ! '<' ! '&' ! "'" Char }+
// SystemLiteral     ::= '"' { ! '"' Char }* '"' | "'" { ! "'" Char }* "'"
// PubidLiteral      ::= '"' { PubidChar }* '"' | "'" { ! "'" PubidChar }* "'"
// PubidChar          := ' ' | '\r' | '\n' | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | "'" | '(' | ')' | '+'
//                       | ',' | '.' | '/' | ':' | '=' | '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%'

// Character Data
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)

// (Lexical Rules)
// CharData ::= { ! '<' ! '&' ! SectEnd_ Char }*

// Comments
// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

// (Lexical Rules)
// Comment ::= "<!--" { ! "--" Char }* "-->"

// Processing Instructions
// [16] PI       ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))

// (Lexical Rules since we need to parse PIData_ as a token)
// PI        := PIStart_ PITarget [ S PIData_ ] PIEnd_
// PITarget  := ! ( ( 'X' | 'x' ) ( 'M' | 'm' ) ( 'L' | 'l' ) ! NameChar ) Name
// PIData_  ::= { ! PIEnd_ Char }*

// CDATA Sections
// [18] CDSect  ::= CDStart CData CDEnd
// [19] CDStart ::= '<![CDATA['
// [20] CData   ::= (Char* - (Char* ']]>' Char*))
// [21] CDEnd   ::= ']]>'

// (Lexical Rules since we need to parse CData as a token)
// CDSect   := CDStart CData CDEnd
// CDStart  := SectStart_ CDATA_ SectBegin_
// CData   ::= { ! SectEnd_ Char }*
// CDEnd    := SectEnd_

// Prolog
// [22] prolog      ::= XMLDecl? Misc* (doctypedecl Misc*)?
// [23] XMLDecl     ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
// [25] Eq          ::= S? '=' S?
// [26] VersionNum  ::= '1.' [0-9]+
// [27] Misc        ::= Comment | PI | S

prolog      := [ XMLDecl ] { Misc }* [ doctypedecl { Misc }* ]
XMLDecl     := "<?" xml_ VersionInfo [ EncodingDecl ] [ SDDecl ] [ S ] "?>"
VersionInfo := [ S ] version_ Eq VersionLiteral_
Misc        := Comment | PI | S

// (Lexical Rules)
// Eq               := [ S ] EqOperator_ [ S ]
// VersionLiteral_ ::= "'" VersionNum "'" | '"' VersionNum '"'
// VersionNum       := "1." { '0'..'9' }+

// Document Type Definition
// [28]  doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
//                       ('[' intSubset ']' S?)? '>'            [VC: Root Element Type]
//                                                              [WFC: External Subset]
// [28a] DeclSep     ::= PEReference | S                        [WFC: PE Between Declarations]
// [28b] intSubset   ::= (markupdecl | DeclSep)*
// [29]  markupdecl  ::= elementdecl | AttlistDecl | EntityDecl
//                       | NotationDecl | PI | Comment          [VC: Proper Declaration/PE Nesting]
//                                                              [WFC: PEs in Internal Subset]

doctypedecl := "<!" DOCTYPE_ S Name [ S ExternalID ] [ S ] [ '[' intSubset ']' [ S ] ] '>'
DeclSep     := PEReference | S
intSubset   := { markupdecl | DeclSep }*
markupdecl  := elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment

// External Subset
// [30] extSubset     ::= TextDecl? extSubsetDecl
// [31] extSubsetDecl ::= (markupdecl | conditionalSect | DeclSep)*

extSubset     := [ TextDecl ] extSubsetDecl
extSubsetDecl := { markupdecl | conditionalSect | DeclSep }*

// Standalone Document Declaration
// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
//                                  | ('"' ('yes' | 'no') '"')) [VC: Standalone Document Declaration]

SDDecl := S standalone_ Eq StandaloneLiteral_

// (Lexical Rules)
// StandaloneLiteral_ ::= "'" ( "yes" | "no" ) "'" | '"' ( "yes" | "no" ) '"'

// (Productions 33 through 38 have been removed.)

// Element
// [39] element ::= EmptyElemTag
//                  | STag content ETag [WFC: Element Type Match]
//                                      [VC: Element Valid]

// (Lexical Rules since we need to parse CharData as a token)
// element := EmptyElemTag | STag content ETag

// Start-tag
// [40] STag      ::= '<' Name (S Attribute)* S? '>' [WFC: Unique Att Spec]
// [41] Attribute ::= Name Eq AttValue               [VC: Attribute Value Type]
//                                                   [WFC: No External Entity References]
//                                                   [WFC: No < in Attribute Values]

// (Lexical Rules since we need to parse CharData as a token)
// STag      := TagStart_ Name { S Attribute }* [ S ] TagEnd_
// Attribute := Name Eq AttValue

// End-tag
// [42] ETag ::= '</' Name S? '>'

// (Lexical Rules since we need to parse CharData as a token)
// ETag := ETagStart_ Name [ S ] TagEnd_

// Content of Elements
// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*

// (Lexical Rules since we need to parse CharData as a token)
// content := [ CharData ] { ( element | Reference | CDSect | PI | Comment ) [ CharData ] }*

// Tags for Empty Elements
// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]

// (Lexical Rules since we need to parse CharData as a token)
// EmptyElemTag := TagStart_ Name { S Attribute }* [ S ] EmptyElemTagEnd_

// Element Type Declaration
// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' [VC: Unique Element Type Declaration]
// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children

elementdecl := "<!" ELEMENT_ S Name S contentspec [ S ] '>'
contentspec := EMPTY_ | ANY_ | Mixed | children

// Element-content Models
// [47] children ::= (choice | seq) ('?' | '*' | '+')?
// [48] cp       ::= (Name | choice | seq) ('?' | '*' | '+')?
// [49] choice   ::= '(' S? cp ( S? '|' S? cp )+ S? ')'       [VC: Proper Group/PE Nesting]
// [50] seq      ::= '(' S? cp ( S? ',' S? cp )* S? ')'       [VC: Proper Group/PE Nesting]

children := ( choice | seq ) [ '?' | '*' | '+' ]
cp       := ( Name | choice | seq ) [ '?' | '*' | '+' ]
choice   := '(' [ S ] cp [ S ] { '|' [ S ] cp [ S ] }+ ')'
seq      := '(' [ S ] cp [ S ] { ',' [ S ] cp [ S ] }* ')'

// Mixed-content Declaration
// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
//                | '(' S? '#PCDATA' S? ')'                  [VC: Proper Group/PE Nesting]
//                                                           [VC: No Duplicate Types]

Mixed := '(' [ S ] '#' PCDATA_ [ S ] ( ')'
                                     | { '|' [ S ] Name [ S ] }* ')' '*' )

// Attribute-list Declaration
// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
// [53] AttDef      ::= S Name S AttType S DefaultDecl

AttlistDecl := "<!" ATTLIST_ S Name { AttDef }* [ S ] '>'
AttDef      := S Name S AttType S DefaultDecl

// Attribute Types
// [54] AttType       ::= StringType | TokenizedType | EnumeratedType
// [55] StringType    ::= 'CDATA'
// [56] TokenizedType ::= 'ID' 	       [VC: ID]
//                                     [VC: One ID per Element Type]
//                                     [VC: ID Attribute Default]
//                        | 'IDREF'    [VC: IDREF]
//                        | 'IDREFS'   [VC: IDREF]
//                        | 'ENTITY'   [VC: Entity Name]
//                        | 'ENTITIES' [VC: Entity Name]
//                        | 'NMTOKEN'  [VC: Name Token]
//                        | 'NMTOKENS' [VC: Name Token]

AttType       := StringType | TokenizedType | EnumeratedType
StringType    := CDATA_
TokenizedType := ID_ | IDREF_ | IDREFS_ | ENTITY_ | ENTITIES_ | NMTOKEN_ | NMTOKENS_

// Enumerated Attribute Types
// [57] EnumeratedType ::= NotationType | Enumeration
// [58] NotationType   ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes]
//                                                                           [VC: One Notation Per Element Type]
//                                                                           [VC: No Notation on Empty Element]
//                                                                           [VC: No Duplicate Tokens]
// [59] Enumeration    ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'        [VC: Enumeration]
//                                                                           [VC: No Duplicate Tokens]

EnumeratedType := NotationType | Enumeration
NotationType   := NOTATION_ S '(' [ S ] Name [ S ] { '|' [ S ] Name [ S ] }* ')'
Enumeration    := '(' [ S ] Nmtoken [ S ] { '|' [ S ] Nmtoken [ S ] }* ')'

// Attribute Defaults
// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
//                      | (('#FIXED' S)? AttValue) [VC: Required Attribute]
//                                                 [VC: Attribute Default Value Syntactically Correct]
//                                                 [WFC: No < in Attribute Values]
//                                                 [VC: Fixed Attribute Default]
//                                                 [WFC: No External Entity References]

DefaultDecl := '#' REQUIRED_ | '#' IMPLIED_ | ( [ '#' FIXED_ S ] AttValue )

// Conditional Section
// [61] conditionalSect    ::= includeSect | ignoreSect
// [62] includeSect        ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'      [VC: Proper Conditional Section
//                                                                                     /PE Nesting]
// [63] ignoreSect         ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' [VC: Proper Conditional Section
//                                                                                     /PE Nesting]
// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
// [65] Ignore             ::= Char* - (Char* ('<![' | ']]>') Char*)

// (Lexical Rules since we need to parse Ignore as a token)
// conditionalSect     := includeSect | ignoreSect
// includeSect         := SectStart_ [ S ] INCLUDE_ [ S ] SectBegin_ extSubsetDecl SectEnd_
// ignoreSect          := SectStart_ [ S ] IGNORE_ [ S ] SectBegin_ { ignoreSectContents }* SectEnd_
// ignoreSectContents  := Ignore { SectStart_ ignoreSectContents SectEnd_ Ignore }*
// Ignore             ::= { ! SectStart_ ! SectEnd_ Char }*

// Character Reference
// [66] CharRef ::= '&#' [0-9]+ ';'
//                  | '&#x' [0-9a-fA-F]+ ';' [WFC: Legal Character]

// (Lexical Rules)
// CharRef ::= "&#" { '0'..'9' }+ ';' | "&#x" { '0'..'9' | 'a'..'f' | 'A'..'F' }+ ';'

// Entity Reference
// [67] Reference   ::= EntityRef | CharRef
// [68] EntityRef   ::= '&' Name ';'        [WFC: Entity Declared]
//                                          [VC: Entity Declared]
//                                          [WFC: Parsed Entity]
//                                          [WFC: No Recursion]
// [69] PEReference ::= '%' Name ';'        [VC: Entity Declared]
//                                          [WFC: No Recursion]
//                                          [WFC: In DTD]

// (Lexical Rules)
// Reference    := EntityRef | CharRef
// EntityRef   ::= '&' Name ';'
// PEReference ::= '%' Name ';'

// Entity Declaration
// [70] EntityDecl ::= GEDecl | PEDecl
// [71] GEDecl     ::= '<!ENTITY' S Name S EntityDef S? '>'
// [72] PEDecl     ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
// [73] EntityDef  ::= EntityValue | (ExternalID NDataDecl?)
// [74] PEDef      ::= EntityValue | ExternalID

EntityDecl := GEDecl | PEDecl
GEDecl     := "<!" ENTITY_ S Name S EntityDef [ S ] '>'
PEDecl     := "<!" ENTITY_ S '%' S Name S PEDef [ S ] '>'
EntityDef  := EntityValue | ExternalID [ NDataDecl ]
PEDef      := EntityValue | ExternalID

// External Entity Declaration
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
//                     | 'PUBLIC' S PubidLiteral S SystemLiteral
// [76] NDataDecl  ::= S 'NDATA' S Name                          [VC: Notation Declared]

ExternalID := SYSTEM_ S SystemLiteral | PUBLIC_ S PubidLiteral S SystemLiteral
NDataDecl  := S NDATA_ S Name

// Text Declaration
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'

TextDecl := "<?" xml_ [ VersionInfo ] EncodingDecl [ S ] "?>"

// Well-Formed External Parsed Entity
// [78] extParsedEnt ::= TextDecl? content

extParsedEnt := [ TextDecl ] content

// Encoding Declaration
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
// [81] EncName      ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
//                       /* Encoding name contains only Latin characters */

EncodingDecl := S encoding_ Eq EncodingLiteral_

// (Lexical Rules)
// EncodingLiteral_ ::= '"' EncName '"' | "'" EncName "'"
// EncName           := ( 'A'..'Z' | 'a'..'z' ) { 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '_' | '-' }*

// Notation Declarations
// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' [VC: Unique Notation Name]
// [83] PublicID     ::= 'PUBLIC' S PubidLiteral

NotationDecl := "<!" NOTATION_ S Name S ( ExternalID | PublicID ) [ S ] '>'
PublicID     := PUBLIC_ S PubidLiteral

// (Productions 84 through 89 have been removed.)

ExtensibleMarkupLanguage :: // Lexical Rules

input := { S | Nmtoken | SystemLiteral | Comment | CDSect | element | conditionalSect | SeparatorOrOperator_ }*
// Notes:
// (1) No input is discarded during lexical parsing, i.e. white space and comments are treated as tokens
// (2) Nmtoken also matches Name
// (3) SystemLiteral also matches EntityValue, AttValue, PubidLiteral,
//                                VersionLiteral_, StandaloneLiteral_ and EncodingLiteral_
// (4) We need to perform some syntax parsing to identify CharData, PIData_, CData and Ignore tokens

// (from Character Range)
Char := '\t' | '\n' | '\r' | 0x20..0xD7FF | 0xE000..0xFFFD | 0x10000..0x10FFFF

// (from White Space)
S ::= { ' ' | '\t' | '\r' | '\n' }+

// (from Names and Tokens)
NameStartChar  := ':' | 'A'..'Z' | '_' | 'a'..'z' | 0xC0..0xD6 | 0xD8..0xF6 | 0xF8..0x2FF | 0x370..0x37D
                  | 0x37F..0x1FFF | 0x200C..0x200D | 0x2070..0x218F | 0x2C00..0x2FEF
                  | 0x3001..0xD7FF | 0xF900..0xFDCF | 0xFDF0..0xFFFD | 0x10000..0xEFFFF
NameChar       := NameStartChar | '-' | '.' | '0'..'9' | 0xB7 | 0x0300..0x036F | 0x203F..0x2040
Name          ::= NameStartChar { NameChar }*
Names         ::= Name { ' ' Name }*
Nmtoken       ::= { NameChar }+
Nmtokens      ::= Nmtoken { ' ' Nmtoken }*

// (from Literals)
EntityValue       ::= '"' { ! '%' ! '&' ! '"' Char | PEReference | Reference }* '"'
                    | "'" { ! '%' ! '&' ! "'" Char | PEReference | Reference }* "'"
AttValue           := AttValueQuot_ { AttValueQuotData_ | Reference }* AttValueQuot_
                    | AttValueApos_ { AttValueAposData_ | Reference }* AttValueApos_
AttValueQuot_     ::= '"'
AttValueQuotData_ ::= { ! '<' ! '&' ! '"' Char }+
AttValueApos_     ::= "'"
AttValueAposData_ ::= { ! '<' ! '&' ! "'" Char }+
SystemLiteral     ::= '"' { ! '"' Char }* '"' | "'" { ! "'" Char }* "'"
PubidLiteral      ::= '"' { PubidChar }* '"' | "'" { ! "'" PubidChar }* "'"
PubidChar          := ' ' | '\r' | '\n' | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | "'" | '(' | ')' | '+'
                      | ',' | '.' | '/' | ':' | '=' | '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%'

// (from Character Data)
CharData ::= { ! '<' ! '&' ! SectEnd_ Char }*

// (from Comments)
Comment ::= "<!--" { ! "--" Char }* "-->"

// (from Processing Instructions)
PI        := PIStart_ PITarget [ S PIData_ ] PIEnd_
PITarget  := ! ( ( 'X' | 'x' ) ( 'M' | 'm' ) ( 'L' | 'l' ) ! NameChar ) Name
PIData_  ::= { ! PIEnd_ Char }*

// (from CDATA Sections)
CDSect   := CDStart CData CDEnd
CDStart  := SectStart_ CDATA_ SectBegin_
CData   ::= { ! SectEnd_ Char }*
CDEnd    := SectEnd_

// (from Prolog)
Eq               := [ S ] EqOperator_ [ S ]
VersionLiteral_ ::= "'" VersionNum "'" | '"' VersionNum '"'
VersionNum       := "1." { '0'..'9' }+

// (from Standalone Document Declaration)
StandaloneLiteral_ ::= "'" ( "yes" | "no" ) "'" | '"' ( "yes" | "no" ) '"'

// (from Element)
element := EmptyElemTag | STag content ETag

// (from Start-tag)
STag      := TagStart_ Name { S Attribute }* [ S ] TagEnd_
Attribute := Name Eq AttValue

// (from End-tag)
ETag := ETagStart_ Name [ S ] TagEnd_

// (from Content of Elements)
content := [ CharData ] { ( element | Reference | CDSect | PI | Comment ) [ CharData ] }*

// (from Tags for Empty Elements)
EmptyElemTag := TagStart_ Name { S Attribute }* [ S ] EmptyElemTagEnd_

// (from Conditional Section)
conditionalSect     := includeSect | ignoreSect
includeSect         := SectStart_ [ S ] INCLUDE_ [ S ] SectBegin_ extSubsetDecl SectEnd_
ignoreSect          := SectStart_ [ S ] IGNORE_ [ S ] SectBegin_ { ignoreSectContents }* SectEnd_
ignoreSectContents  := Ignore { SectStart_ ignoreSectContents SectEnd_ Ignore }*
Ignore             ::= { ! SectStart_ ! SectEnd_ Char }*

// (from Character Reference)
CharRef ::= "&#" { '0'..'9' }+ ';' | "&#x" { '0'..'9' | 'a'..'f' | 'A'..'F' }+ ';'

// (from Entity Reference)
Reference    := EntityRef | CharRef
EntityRef   ::= '&' Name ';'
PEReference ::= '%' Name ';'

// (from Encoding Declaration)
EncodingLiteral_ ::= '"' EncName '"' | "'" EncName "'"
EncName           := ( 'A'..'Z' | 'a'..'z' ) { 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '_' | '-' }*

// Separators and Operators

SeparatorOrOperator_ ::= "<![" | "]]>" | "/>" | "<!" | "</" | "<?" | "?>" | '#' | '%' | '(' | ')'
                         | '*' | '+' | ',' | '<' | '=' | '>' | '?' | '[' | ']' | '|'

PIStart_         ::= "<?"
PIEnd_           ::= "?>"
SectStart_       ::= "<!["
SectBegin_       ::= '['
SectEnd_         ::= "]]>"
EqOperator_      ::= '='
TagStart_        ::= '<'
TagEnd_          ::= '>'
ETagStart_       ::= "</"
EmptyElemTagEnd_ ::= "/>"

// Keywords

Keyword_ := ANY_ | ATTLIST_
            | CDATA_
            | DOCTYPE_
            | ELEMENT_ | EMPTY_ | ENTITIES_ | ENTITY_
            | FIXED_
            | ID_ | IDREF_ | IDREFS_ | IGNORE_ | IMPLIED_ | INCLUDE_
            | NDATA_ | NMTOKEN_ | NMTOKENS_ | NOTATION_
            | PCDATA_ | PUBLIC_
            | REQUIRED_
            | SYSTEM_
            | encoding_
            | standalone_
            | version_
            | xml_

ANY_        ::= "ANY"        ! NameChar
ATTLIST_    ::= "ATTLIST"    ! NameChar
CDATA_      ::= "CDATA"      ! NameChar
DOCTYPE_    ::= "DOCTYPE"    ! NameChar
ELEMENT_    ::= "ELEMENT"    ! NameChar
EMPTY_      ::= "EMPTY"      ! NameChar
ENTITIES_   ::= "ENTITIES"   ! NameChar
ENTITY_     ::= "ENTITY"     ! NameChar
FIXED_      ::= "FIXED"      ! NameChar
ID_         ::= "ID"         ! NameChar
IDREF_      ::= "IDREF"      ! NameChar
IDREFS_     ::= "IDREFS"     ! NameChar
IGNORE_     ::= "IGNORE"     ! NameChar
IMPLIED_    ::= "IMPLIED"    ! NameChar
INCLUDE_    ::= "INCLUDE"    ! NameChar
NDATA_      ::= "NDATA"      ! NameChar
NMTOKEN_    ::= "NMTOKEN"    ! NameChar
NMTOKENS_   ::= "NMTOKENS"   ! NameChar
NOTATION_   ::= "NOTATION"   ! NameChar
PCDATA_     ::= "PCDATA"     ! NameChar
PUBLIC_     ::= "PUBLIC"     ! NameChar
REQUIRED_   ::= "REQUIRED"   ! NameChar
SYSTEM_     ::= "SYSTEM"     ! NameChar
encoding_   ::= "encoding"   ! NameChar
standalone_ ::= "standalone" ! NameChar
version_    ::= "version"    ! NameChar
xml_        ::= "xml"        ! NameChar

// End of File