1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441 |
- (*
- * Super Object Toolkit
- *
- * Usage allowed under the restrictions of the Lesser GNU General Public License
- * or alternatively the restrictions of the Mozilla Public License 1.1
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
- * the specific language governing rights and limitations under the License.
- *
- * Embarcadero Technologies Inc is not permitted to use or redistribute
- * this source code without explicit permission.
- *
- * Unit owner : Henri Gourvest <hgourvest@gmail.com>
- * Web site : http://www.progdigy.com
- *)
- unit superxmlparser;
- {$IFDEF FPC}
- {$MODE OBJFPC}{$H+}
- {$ENDIF}
- interface
- uses superobject, classes;
- type
- TOnProcessingInstruction = procedure(const PI, PIParent: ISuperObject);
- function XMLParseString(const data: SOString; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject;
- function XMLParseStream(stream: TStream; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject;
- function XMLParseFile(const FileName: string; pack: Boolean = false; onpi: TOnProcessingInstruction = nil): ISuperObject;
- {$IFDEF UNICODE}
- type
- TXMLWriteMethod = reference to procedure(const data: string);
- procedure XMLWrite(const node: ISuperObject; const method: TXMLWriteMethod);
- {$ENDIF}
- const
- xmlname = '#name';
- xmlattributes = '#attributes';
- xmlchildren = '#children';
- xmltext = '#text';
- dtdname = '#name';
- dtdPubidLiteral = '#pubidliteral';
- dtdSystemLiteral = '#systemliteral';
- implementation
- uses sysutils {$IFNDEF UNIX}, windows{$ENDIF};
- const
- XML_SPACE : PSOChar = #32;
- // XML_ARL: PSOChar = '[';
- XML_ARR: PSOChar = ']';
- XML_BIG: PSOChar = '>';
- XML_LOW: PSOChar = '<';
- XML_AMP: PSOChar = '&';
- XML_SQU: PSOChar = '''';
- XML_DQU: PSOChar = '"';
- type
- TSuperXMLState = (
- xsStart, // |
- xsEatSpaces, //
- xsElement, // <|
- xsElementName, // <[a..z]|
- xsAttributes, // <xml |
- xsAttributeName, // <xml a|
- xsEqual, // |= ...
- xsAttributeValue, // = |"...
- xsCloseEmptyElement, // <xml/|
- xsTryCloseElement, // <xml>..<|
- xsCloseElementName, // <xml>..</|
- xsChildren, // <xml>|
- xsElementString, // <xml> |azer
- xsElementComment, // <!|-- ...
- xsElementDocType, // <!D|
- xsElementDocTypeName, // <!DOCTYPE |...
- xsElementDocTypeExternId, // <!DOCTYPE xml |
- xsElementDocTypeExternIdPublic, // <!DOCTYPE xml P|
- xsElementDocTypeExternIdSystem, // <!DOCTYPE xml S|
- xsElementDocTypePubIdLiteral, // <!DOCTYPE xml SYSTEM |"
- xsElementDocTypeSystemLiteral, // <!DOCTYPE xml SYSTEM "" |""
- xsElementDocTypeTryIntSubset,
- xsElementDocTypeIntSubset,
- xsElementDocTypeTryClose,
- xsElementDocTypeEat, //
- xsCloseElementComment, // <!-- -|->
- xsElementPI, // <?|
- xsElementDataPI, // not an xml PI
- xsCloseElementPI, // <? ?|>
- xsElementCDATA, // <![|CDATA[
- xsClodeElementCDATA, // ]|]>
- xsEscape, // &|
- xsEscape_lt, // &l|t;
- xsEscape_gt, // &g|t;
- xsEscape_amp, // &a|mp;
- xsEscape_apos, // &a|pos;
- xsEscape_quot, // &q|uot;
- xsEscape_char, // &#|;
- xsEscape_char_num, // |123456;
- xsEscape_char_hex, // &#x|000FFff;
- xsEnd);
- TSuperXMLError = (xeSuccess, xeContinue, xeProcessInst, xeError);
- TSuperXMLElementClass = (xcNone, xcElement, xcComment, xcString, xcCdata, xcDocType, xcProcessInst);
- TSuperXMLEncoding = ({$IFNDEF UNIX}xnANSI,{$ENDIF} xnUTF8, xnUnicode);
- {$IFDEF UNICODE}
- procedure XMLWrite(const node: ISuperObject; const method: TXMLWriteMethod);
- var
- o: ISuperObject;
- ent: TSuperAvlEntry;
- str: string;
- begin
- str := '<' + node.S[xmlname];
- if ObjectIsType(node[xmlattributes], stObject) then
- for ent in node[xmlattributes].AsObject do
- str := str + ' ' + ent.Name + '="' + ent.Value.AsString + '"';
- if ObjectIsType(node[xmlchildren], stArray) then
- begin
- method(str + '>');
- for o in node[xmlchildren] do
- if ObjectIsType(o, stString) then
- method(o.AsString) else
- XMLWrite(o, method);
- method('</' + node.S[xmlname] + '>');
- end else
- method(str + '/>');
- end;
- {$ENDIF}
- type
- PSuperXMLStack = ^TSuperXMLStack;
- TSuperXMLStack = record
- state: TSuperXMLState;
- savedstate: TSuperXMLState;
- prev: PSuperXMLStack;
- next: PSuperXMLStack;
- clazz: TSuperXMLElementClass;
- obj: ISuperObject;
- end;
- TSuperXMLParser = class
- private
- FStack: PSuperXMLStack;
- FDocType: ISuperObject;
- FError: TSuperXMLError;
- FStr: TSuperWriterString;
- FValue: TSuperWriterString;
- FPosition: Integer;
- FAChar: SOChar;
- FPack: Boolean;
- procedure StackUp;
- procedure StackDown;
- procedure Reset;
- function ParseBuffer(data: PSOChar; var PI, PIParent: ISuperObject; len: Integer = -1): Integer;
- public
- constructor Create(pack: Boolean);
- destructor Destroy; override;
- end;
- { TXMLContext }
- constructor TSuperXMLParser.Create(pack: Boolean);
- begin
- FDocType := nil;
- FStr := TSuperWriterString.Create;
- FValue := TSuperWriterString.Create;
- StackUp;
- FError := xeSuccess;
- FPack := pack;
- end;
- destructor TSuperXMLParser.Destroy;
- begin
- while FStack <> nil do
- StackDown;
- FStr.Free;
- FValue.Free;
- end;
- procedure TSuperXMLParser.Reset;
- begin
- while FStack <> nil do
- StackDown;
- StackUp;
- FError := xeSuccess;
- end;
- function TSuperXMLParser.ParseBuffer(data: PSOChar; var PI, PIParent: ISuperObject; len: integer): Integer;
- const
- spaces = [#32,#9,#10,#13];
- alphas = ['a'..'z', 'A'..'Z', '_', ':', #161..#255];
- nums = ['0'..'9', '.', '-'];
- hex = nums + ['a'..'f','A'..'F'];
- alphanums = alphas + nums;
- publitteral = [#32, #13, #10, 'a'..'z', 'A'..'Z', '0'..'9', '-', '''', '"', '(', ')',
- '+', ',', '.', '/', ':', '=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];
- function hexdigit(const x: SOChar): byte;
- begin
- if x <= '9' then
- Result := byte(x) - byte('0') else
- Result := (byte(x) and 7) + 9;
- end;
- procedure putchildrenstr;
- var
- anobject: ISuperObject;
- begin
- anobject := FStack^.obj.AsObject[xmlchildren];
- if anobject = nil then
- begin
- anobject := TSuperObject.Create(stArray);
- FStack^.obj.AsObject[xmlchildren] := anobject;
- end;
- anobject.AsArray.Add(TSuperObject.Create(FValue.Data));
- end;
- procedure AddProperty(const parent, value: ISuperObject; const name: SOString);
- var
- anobject: ISuperObject;
- arr: ISuperObject;
- begin
- anobject := parent.AsObject[name];
- if anobject = nil then
- parent.AsObject[name] := value else
- begin
- if (anobject.DataType = stArray) then
- anobject.AsArray.Add(value) else
- begin
- arr := TSuperObject.Create(stArray);
- arr.AsArray.Add(anobject);
- arr.AsArray.Add(value);
- parent.AsObject[name] := arr;
- end;
- end;
- end;
- procedure packend;
- var
- anobject, anobject2: ISuperObject;
- n: Integer;
- begin
- anobject := FStack^.obj.AsObject[xmlchildren];
- if (anobject <> nil) and (anobject.AsArray.Length = 1) and (anobject.AsArray[0].DataType = stString) then
- begin
- if FStack^.obj.AsObject.count = 2 then // name + children
- begin
- if FStack^.prev <> nil then
- AddProperty(FStack^.prev^.obj, anobject.AsArray[0], FStack^.obj.AsObject.S[xmlname]) else
- begin
- AddProperty(FStack^.obj, anobject.AsArray[0], xmltext);
- FStack^.obj.AsObject.Delete(xmlchildren);
- end;
- end
- else
- begin
- AddProperty(FStack^.obj, anobject.AsArray[0], FStack^.obj.AsObject.S[xmlname]);
- FStack^.obj.AsObject.Delete(xmlchildren);
- if FStack^.prev <> nil then
- AddProperty(FStack^.prev^.obj, FStack^.obj, FStack^.obj.AsObject.S[xmlname]) else
- FStack^.obj.AsObject.Delete(xmlchildren);
- FStack^.obj.AsObject.Delete(xmlname);
- end;
- end else
- begin
- if (anobject <> nil) then
- begin
- for n := 0 to anobject.AsArray.Length - 1 do
- begin
- anobject2 := anobject.AsArray[n];
- if ObjectIsType(anobject2, stObject) then
- begin
- AddProperty(FStack^.obj, anobject2, anobject2.AsObject.S[xmlname]);
- anobject2.AsObject.Delete(xmlname);
- end else
- AddProperty(FStack^.obj, anobject2, xmltext);
- end;
- FStack^.obj.Delete(xmlchildren);
- end;
- if (FStack^.prev <> nil) and (FStack^.obj.AsObject.count > 1) then
- begin
- if (FStack^.obj.AsObject.count = 2) and (FStack^.obj.AsObject[xmltext] <> nil) then
- AddProperty(FStack^.prev^.obj, FStack^.obj.AsObject[xmltext], FStack^.obj.AsObject.S[xmlname]) else
- AddProperty(FStack^.prev^.obj, FStack^.obj, FStack^.obj.AsObject.S[xmlname]);
- end;
- FStack^.obj.Delete(xmlname);
- end;
- end;
- var
- c: SOChar;
- read: Integer;
- p: PSOChar;
- anobject: ISuperObject;
- label
- redo, err;
- begin
- p := data;
- read := 0;
- //Result := 0;
- repeat
- if (read = len) then
- begin
- if (FStack^.prev = nil) and ((FStack^.state = xsEnd) or ((FStack^.state = xsEatSpaces) and (FStack^.savedstate = xsEnd))) then
- begin
- if FPack then
- packend;
- FError := xeSuccess;
- end else
- FError := xeContinue;
- Result := read;
- exit;
- end;
- c := p^;
- redo:
- case FStack^.state of
- xsEatSpaces:
- if {$IFDEF UNICODE}(c < #256) and {$ENDIF} (AnsiChar(c) in spaces) then {nop} else
- begin
- FStack^.state := FStack^.savedstate;
- goto redo;
- end;
- xsStart:
- case c of
- '<': FStack^.state := xsElement;
- else
- goto err;
- end;
- xsElement:
- begin
- case c of
- '?':
- begin
- FStack^.savedstate := xsStart;
- FStack^.state := xsEatSpaces;
- StackUp;
- FStr.Reset;
- FStack^.state := xsElementPI;
- FStack^.clazz := xcProcessInst;
- end;
- '!':
- begin
- FPosition := 0;
- FStack^.state := xsElementComment;
- FStack^.clazz := xcComment;
- end;
- else
- if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then
- begin
- FStr.Reset;
- FStack^.state := xsElementName;
- FStack^.clazz := xcElement;
- goto redo;
- end else
- goto err;
- end;
- end;
- xsElementPI:
- begin
- if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then
- FStr.Append(@c, 1) else
- begin
- FStack^.obj := TSuperObject.Create(stObject);
- FStack^.obj.AsObject.S[xmlname] := FStr.Data;
- FStack^.state := xsEatSpaces;
- if FStr.Data = 'xml' then
- FStack^.savedstate := xsAttributes else
- begin
- FValue.Reset;
- FStack^.savedstate := xsElementDataPI;
- end;
- goto redo;
- end;
- end;
- xsElementDataPI:
- begin
- case c of
- '?':
- begin
- FStack^.obj.AsObject.S['data'] := FValue.Data;
- FStack^.state := xsCloseElementPI;
- end;
- else
- FValue.Append(@c, 1);
- end;
- end;
- xsCloseElementPI:
- begin
- if (c <> '>') then goto err;
- PI := FStack^.obj;
- StackDown;
- PIParent := FStack^.obj;
- FError := xeProcessInst;
- Result := read + 1;
- Exit;
- end;
- xsElementName:
- begin
- if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then
- FStr.Append(@c, 1) else
- begin
- FStack^.obj := TSuperObject.Create(stObject);
- FStack^.obj.AsObject.S[xmlname] := FStr.Data;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsAttributes;
- goto redo;
- end;
- end;
- xsChildren:
- begin
- case c of
- '<': FStack^.state := xsTryCloseElement;
- else
- FValue.Reset;
- FStack^.state := xsElementString;
- FStack^.clazz := xcString;
- goto redo;
- end;
- end;
- xsCloseEmptyElement:
- begin
- case c of
- '>':
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsEnd;
- end
- else
- goto err;
- end;
- end;
- xsTryCloseElement:
- begin
- case c of
- '/': begin
- FStack^.state := xsCloseElementName;
- FPosition := 0;
- FStr.Reset;
- FStr.Append(PSoChar(FStack^.obj.AsObject.S[xmlname]));
- end;
- '!': begin
- FPosition := 0;
- FStack^.state := xsElementComment;
- FStack^.clazz := xcComment;
- end;
- '?': begin
- FStack^.savedstate := xsChildren;
- FStack^.state := xsEatSpaces;
- StackUp;
- FStr.Reset;
- FStack^.state := xsElementPI;
- FStack^.clazz := xcProcessInst;
- end
- else
- FStack^.state := xsChildren;
- StackUp;
- if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then
- begin
- FStr.Reset;
- FStack^.state := xsElementName;
- FStack^.clazz := xcElement;
- goto redo;
- end else
- goto err;
- end;
- end;
- xsCloseElementName:
- begin
- if FStr.Position = FPosition then
- begin
- FStack^.savedstate := xsCloseEmptyElement;
- FStack^.state := xsEatSpaces;
- goto redo;
- end else
- begin
- if (c <> FStr.Data[FPosition]) then goto err;
- inc(FPosition);
- end;
- end;
- xsAttributes:
- begin
- case c of
- '?': begin
- if FStack^.clazz <> xcProcessInst then goto err;
- FStack^.state := xsCloseElementPI;
- end;
- '/': begin
- FStack^.state := xsCloseEmptyElement;
- end;
- '>': begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsChildren;
- end
- else
- if ((c < #256) and (AnsiChar(c) in alphas)) or (c >= #256) then
- begin
- FStr.Reset;
- FStr.Append(@c, 1);
- FStack^.state := xsAttributeName;
- end else
- goto err;
- end;
- end;
- xsAttributeName:
- begin
- if ((c < #256) and (AnsiChar(c) in alphanums)) or (c >= #256) then
- FStr.Append(@c, 1) else
- begin
- // no duplicate attribute
- if FPack then
- begin
- if FStack^.obj.AsObject[FStr.Data] <> nil then
- goto err;
- end else
- begin
- anobject := FStack^.obj.AsObject[xmlattributes];
- if (anobject <> nil) and (anobject.AsObject[FStr.Data] <> nil) then
- goto err;
- end;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsEqual;
- goto redo;
- end;
- end;
- xsEqual:
- begin
- if c <> '=' then goto err;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsAttributeValue;
- FValue.Reset;
- FPosition := 0;
- FAChar := #0;
- end;
- xsAttributeValue:
- begin
- if FAChar <> #0 then
- begin
- if (c = FAChar) then
- begin
- if FPack then
- begin
- FStack^.obj.AsObject[FStr.Data] := TSuperObject.Create(Fvalue.Data);
- end else
- begin
- anobject := FStack^.obj.AsObject[xmlattributes];
- if anobject = nil then
- begin
- anobject := TSuperObject.Create(stObject);
- FStack^.obj.AsObject[xmlattributes] := anobject;
- end;
- anobject.AsObject[FStr.Data] := TSuperObject.Create(Fvalue.Data);
- end;
- FStack^.savedstate := xsAttributes;
- FStack^.state := xsEatSpaces;
- end else
- case c of
- '&':
- begin
- FStack^.state := xsEscape;
- FStack^.savedstate := xsAttributeValue;
- end;
- #13, #10:
- begin
- FValue.TrimRight;
- FValue.Append(XML_SPACE, 1);
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsAttributeValue;
- end;
- else
- FValue.Append(@c, 1);
- end;
- end else
- begin
- if (c < #256) and (AnsiChar(c) in ['"', '''']) then
- begin
- FAChar := c;
- inc(FPosition);
- end else
- goto err;
- end;
- end;
- xsElementString:
- begin
- case c of
- '<': begin
- FValue.TrimRight;
- putchildrenstr;
- FStack^.state := xsTryCloseElement;
- end;
- #13, #10:
- begin
- FValue.TrimRight;
- FValue.Append(XML_SPACE, 1);
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementString;
- end;
- '&':
- begin
- FStack^.state := xsEscape;
- FStack^.savedstate := xsElementString;
- end
- else
- FValue.Append(@c, 1);
- end;
- end;
- xsElementComment:
- begin
- case FPosition of
- 0:
- begin
- case c of
- '-': Inc(FPosition);
- '[':
- begin
- FValue.Reset;
- FPosition := 0;
- FStack^.state := xsElementCDATA;
- FStack^.clazz := xcCdata;
- end;
- 'D':
- begin
- if (FStack^.prev = nil) and (FDocType = nil) then
- begin
- FStack^.state := xsElementDocType;
- FPosition := 0;
- FStack^.clazz := xcDocType;
- end else
- goto err;
- end;
- else
- goto err;
- end;
- end;
- 1:
- begin
- if c <> '-' then goto err;
- Inc(FPosition);
- end;
- else
- if c = '-' then
- begin
- FPosition := 0;
- FStack^.state := xsCloseElementComment;
- end;
- end;
- end;
- xsCloseElementComment:
- begin
- case FPosition of
- 0: begin
- if c <> '-' then
- begin
- FPosition := 2;
- FStack^.state := xsElementComment;
- end else
- Inc(FPosition);
- end;
- 1: begin
- if c <> '>' then goto err;
- FStack^.state := xsEatSpaces;
- if FStack^.obj <> nil then
- FStack^.savedstate := xsChildren else
- FStack^.savedstate := xsStart;
- end;
- end;
- end;
- xsElementCDATA:
- begin
- case FPosition of
- 0: if (c = 'C') then inc(FPosition) else goto err;
- 1: if (c = 'D') then inc(FPosition) else goto err;
- 2: if (c = 'A') then inc(FPosition) else goto err;
- 3: if (c = 'T') then inc(FPosition) else goto err;
- 4: if (c = 'A') then inc(FPosition) else goto err;
- 5: if (c = '[') then inc(FPosition) else goto err;
- else
- case c of
- ']': begin
- FPosition := 0;
- FStack^.state := xsClodeElementCDATA;
- end;
- else
- FValue.Append(@c, 1);
- end;
- end;
- end;
- xsClodeElementCDATA:
- begin
- case FPosition of
- 0: if (c = ']') then
- inc(FPosition) else
- begin
- FValue.Append(XML_ARR, 1);
- FValue.Append(@c, 1);
- FPosition := 6;
- FStack^.state := xsElementCDATA;
- end;
- 1: case c of
- '>':
- begin
- putchildrenstr;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsChildren;
- end;
- ']':
- begin
- FValue.Append(@c, 1);
- end;
- else
- FValue.Append(@c, 1);
- FStack^.state := xsElementCDATA;
- end;
- end;
- end;
- xsElementDocType:
- begin
- case FPosition of
- 0: if (c = 'O') then inc(FPosition) else goto err;
- 1: if (c = 'C') then inc(FPosition) else goto err;
- 2: if (c = 'T') then inc(FPosition) else goto err;
- 3: if (c = 'Y') then inc(FPosition) else goto err;
- 4: if (c = 'P') then inc(FPosition) else goto err;
- 5: if (c = 'E') then inc(FPosition) else goto err;
- else
- if (c < #256) and (AnsiChar(c) in spaces) then
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeName;
- FStr.Reset;
- end else
- goto err;
- end;
- end;
- xsElementDocTypeName:
- begin
- case FStr.Position of
- 0: begin
- case c of
- '>':
- begin
- FStack^.state := xsEatSpaces;
- FStack^.state := xsStart;
- FStack^.clazz := xcNone;
- end
- else
- if ((c < #256) and (AnsiChar(c) in alphas)) or (c > #256) then
- FStr.Append(@c, 1) else
- goto err;
- end;
- end;
- else
- if ((c < #256) and (AnsiChar(c) in alphanums)) or (c > #256) then
- FStr.Append(@c, 1) else
- if (c < #256) and (AnsiChar(c) in spaces) then
- begin
- FDocType := TSuperObject.Create(stObject);
- FDocType.AsObject.S[xmlname] := FStr.Data;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeExternId;
- end else
- goto err;
- end;
- end;
- xsElementDocTypeExternId:
- begin
- case c of
- 'P':
- begin
- FPosition := 0;
- FStack^.state := xsElementDocTypeExternIdPublic;
- end;
- 'S':
- begin
- FPosition := 0;
- FStack^.state := xsElementDocTypeExternIdSystem;
- end;
- '[':
- begin
- FStack^.savedstate := xsElementDocTypeIntSubset;
- FStack^.state := xsEatSpaces;
- end;
- '>':
- begin
- FStack^.savedstate := xsStart;
- FStack^.state := xsEatSpaces
- end
- else
- goto err;
- end;
- end;
- xsElementDocTypeExternIdPublic:
- begin
- case FPosition of
- 0: if (c = 'U') then inc(FPosition) else goto err;
- 1: if (c = 'B') then inc(FPosition) else goto err;
- 2: if (c = 'L') then inc(FPosition) else goto err;
- 3: if (c = 'I') then inc(FPosition) else goto err;
- 4: if (c = 'C') then inc(FPosition) else goto err;
- else
- if (c < #256) and (AnsiChar(c) in spaces) then
- begin
- FStr.Reset;
- FPosition := 0;
- FStack^.savedstate := xsElementDocTypePubIdLiteral;
- FStack^.state := xsEatSpaces;
- end else
- goto err;
- end;
- end;
- xsElementDocTypeExternIdSystem:
- begin
- case FPosition of
- 0: if (c = 'Y') then inc(FPosition) else goto err;
- 1: if (c = 'S') then inc(FPosition) else goto err;
- 2: if (c = 'T') then inc(FPosition) else goto err;
- 3: if (c = 'E') then inc(FPosition) else goto err;
- 4: if (c = 'M') then inc(FPosition) else goto err;
- else
- if (c < #256) and (AnsiChar(c) in spaces) then
- begin
- FStr.Reset;
- FPosition := 0;
- FStack^.savedstate := xsElementDocTypeSystemLiteral;
- FStack^.state := xsEatSpaces;
- end else
- goto err;
- end;
- end;
- xsElementDocTypePubIdLiteral:
- begin
- if FPosition = 0 then
- case c of
- '"', '''':
- begin
- FAChar := c;
- FPosition := 1;
- end
- else
- goto err;
- end else
- if c = FAChar then
- begin
- FDocType.AsObject.S[dtdPubidLiteral] := FStr.Data;
- FStr.Reset;
- FPosition := 0;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeSystemLiteral;
- end else
- if (c < #256) and (AnsiChar(c) in publitteral) then
- FStr.Append(@c, 1);
- end;
- xsElementDocTypeSystemLiteral:
- begin
- if FPosition = 0 then
- case c of
- '"', '''':
- begin
- FAChar := c;
- FPosition := 1;
- end
- else
- goto err;
- end else
- if c = FAChar then
- begin
- FDocType.AsObject.S[dtdSystemLiteral] := FStr.Data;
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeTryIntSubset;
- end else
- FStr.Append(@c, 1);
- end;
- xsElementDocTypeTryIntSubset:
- begin
- case c of
- '>':
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsStart;
- FStack^.clazz := xcNone;
- end;
- '[':
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeIntSubset;
- end;
- end;
- end;
- xsElementDocTypeIntSubset:
- begin
- case c of
- ']':
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsElementDocTypeTryClose;
- end;
- end;
- end;
- xsElementDocTypeTryClose:
- begin
- if c = '>' then
- begin
- FStack^.state := xsEatSpaces;
- FStack^.savedstate := xsStart;
- FStack^.clazz := xcNone;
- end else
- goto err;
- end;
- xsEscape:
- begin
- FPosition := 0;
- case c of
- 'l': FStack^.state := xsEscape_lt;
- 'g': FStack^.state := xsEscape_gt;
- 'a': FStack^.state := xsEscape_amp;
- 'q': FStack^.state := xsEscape_quot;
- '#': FStack^.state := xsEscape_char;
- else
- goto err;
- end;
- end;
- xsEscape_lt:
- begin
- case FPosition of
- 0: begin
- if c <> 't' then goto err;
- Inc(FPosition);
- end;
- 1: begin
- if c <> ';' then goto err;
- FValue.Append(XML_LOW, 1);
- FStack^.state := FStack^.savedstate;
- end;
- end;
- end;
- xsEscape_gt:
- begin
- case FPosition of
- 0: begin
- if c <> 't' then goto err;
- Inc(FPosition);
- end;
- 1: begin
- if c <> ';' then goto err;
- FValue.Append(XML_BIG, 1);
- FStack^.state := FStack^.savedstate;
- end;
- end;
- end;
- xsEscape_amp:
- begin
- case FPosition of
- 0: begin
- case c of
- 'm': Inc(FPosition);
- 'p': begin
- FStack^.state := xsEscape_apos;
- Inc(FPosition);
- end;
- else
- goto err;
- end;
- end;
- 1: begin
- if c <> 'p' then goto err;
- Inc(FPosition);
- end;
- 2: begin
- if c <> ';' then goto err;
- FValue.Append(XML_AMP, 1);
- FStack^.state := FStack^.savedstate;
- end;
- end;
- end;
- xsEscape_apos:
- begin
- case FPosition of
- 0: begin
- case c of
- 'p': Inc(FPosition);
- 'm': begin
- FStack^.state := xsEscape_amp;
- Inc(FPosition);
- end;
- else
- goto err;
- end;
- end;
- 1: begin
- if c <> 'o' then goto err;
- Inc(FPosition);
- end;
- 2: begin
- if c <> 's' then goto err;
- Inc(FPosition);
- end;
- 3: begin
- if c <> ';' then goto err;
- FValue.Append(XML_SQU, 1);
- FStack^.state := FStack^.savedstate;
- end;
- end;
- end;
- xsEscape_quot:
- begin
- case FPosition of
- 0: begin
- if c <> 'u' then goto err;
- Inc(FPosition);
- end;
- 1: begin
- if c <> 'o' then goto err;
- Inc(FPosition);
- end;
- 2: begin
- if c <> 't' then goto err;
- Inc(FPosition);
- end;
- 3: begin
- if c <> ';' then goto err;
- FValue.Append(XML_DQU, 1);
- FStack^.state := FStack^.savedstate;
- end;
- end;
- end;
- xsEscape_char:
- begin
- if (SOIChar(c) >= 256) then goto err;
- case AnsiChar(c) of
- '0'..'9':
- begin
- FPosition := SOIChar(c) - 48;
- FStack^.state := xsEscape_char_num;
- end;
- 'x':
- begin
- FStack^.state := xsEscape_char_hex;
- end
- else
- goto err;
- end;
- end;
- xsEscape_char_num:
- begin
- if (SOIChar(c) >= 256) then goto err;
- case AnsiChar(c) of
- '0'..'9':FPosition := (FPosition * 10) + (SOIChar(c) - 48);
- ';': begin
- FValue.Append(@FPosition, 1);
- FStack^.state := FStack^.savedstate;
- end;
- else
- goto err;
- end;
- end;
- xsEscape_char_hex:
- begin
- if (c >= #256) then goto err;
- if (AnsiChar(c) in hex) then
- begin
- FPosition := (FPosition * 16) + SOIChar(hexdigit(c));
- end else
- if c = ';' then
- begin
- FValue.Append(@FPosition, 1);
- FStack^.state := FStack^.savedstate;
- end else
- goto err;
- end;
- xsEnd:
- begin
- if(FStack^.prev = nil) then Break;
- if FStack^.obj <> nil then
- begin
- if FPack then
- packend else
- begin
- anobject := FStack^.prev^.obj.AsObject[xmlchildren];
- if anobject = nil then
- begin
- anobject := TSuperObject.Create(stArray);
- FStack^.prev^.obj.AsObject[xmlchildren] := anobject;
- end;
- anobject.AsArray.Add(FStack^.obj);
- end;
- end;
- StackDown;
- goto redo;
- end;
- end;
- inc(p);
- inc(read);
- until (c = #0);
- if FStack^.state = xsEnd then
- begin
- if FPack then
- packend;
- FError := xeSuccess;
- end else
- FError := xeError;
- Result := read;
- exit;
- err:
- FError := xeError;
- Result := read;
- end;
- function XMLParseFile(const FileName: string; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject;
- var
- stream: TFileStream;
- begin
- stream := TFileStream.Create(FileName, fmOpenRead, fmShareDenyWrite);
- try
- Result := XMLParseStream(stream, pack, onpi);
- finally
- stream.Free;
- end;
- end;
- procedure TSuperXMLParser.StackDown;
- var
- prev: PSuperXMLStack;
- begin
- if FStack <> nil then
- begin
- prev := FStack^.prev;
- FStack^.obj := nil;
- FreeMem(FStack);
- FStack := prev;
- if FStack <> nil then
- FStack^.next := nil;
- end;
- end;
- procedure TSuperXMLParser.StackUp;
- var
- st: PSuperXMLStack;
- begin
- {$IFDEF FPC}
- st := nil;
- {$ENDIF}
- GetMem(st, SizeOf(st^));
- FillChar(st^, SizeOf(st^), 0);
- st^.state := xsEatSpaces;
- st^.savedstate := xsStart;
- st^.prev := FStack;
- if st^.prev <> nil then
- st^.prev^.next := st;
- st^.next := nil;
- st^.obj := nil;
- FStack := st;
- end;
- function utf8toucs2(src: PAnsiChar; srclen: Integer; dst: PWideChar; unused: PInteger): Integer;
- var
- ch: Byte;
- ret: Word;
- min: Cardinal;
- rem, com: integer;
- label
- redo;
- begin
- Result := 0;
- ret := 0;
- rem := 0;
- min := 0;
- if unused <> nil then
- unused^ := 0;
- if(src = nil) or (srclen = 0) then
- begin
- dst^ := #0;
- Exit;
- end;
- while srclen > 0 do
- begin
- ch := Byte(src^);
- inc(src);
- dec(srclen);
- redo:
- if (ch and $80) = 0 then
- begin
- dst^ := WideChar(ch);
- inc(Result);
- end else
- begin
- if((ch and $E0) = $C0) then
- begin
- min := $80;
- rem := 1;
- ret := ch and $1F;
- end else
- if((ch and $F0) = $E0) then
- begin
- min := $800;
- rem := 2;
- ret := ch and $0F;
- end else
- // too large utf8 bloc
- // ignore and continue
- continue;
- com := rem;
- while(rem <> 0) do
- begin
- dec(rem);
- if(srclen = 0) then
- begin
- if unused <> nil then
- unused^ := com;
- Exit;
- end;
- ch := Byte(src^);
- inc(src);
- dec(srclen);
- if((ch and $C0) = $80) then
- begin
- ret := ret shl 6;
- ret := ret or (ch and $3F);
- end else
- begin
- // unterminated utf8 bloc :/
- // try next one
- goto redo;
- end;
- end;
- if (ret >= min) then
- begin
- dst^ := WideChar(ret);
- inc(Result);
- end else
- // too small utf8 bloc
- // ignore and continue
- Continue;
- end;
- inc(dst);
- end;
- end;
- function XMLParseStream(stream: TStream; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject;
- const
- CP_UTF8 = 65001;
- var
- wbuffer: array[0..1023] of SOChar;
- abuffer: array[0..1023] of AnsiChar;
- len, read, cursor: Integer;
- PI, PIParent: ISuperObject;
- bom: array[0..2] of byte;
- encoding: TSuperXMLEncoding;
- encodingstr: string;
- cp: Integer;
- ecp: ISuperObject;
- function getbuffer: Integer;
- var
- size, unusued: Integer;
- begin
- case encoding of
- {$IFNDEF UNIX}
- xnANSI:
- begin
- size := stream.Read(abuffer, sizeof(abuffer));
- result := MultiByteToWideChar(cp, 0, @abuffer, size, @wbuffer, sizeof(wbuffer));
- end;
- {$ENDIF}
- xnUTF8:
- begin
- size := stream.Read(abuffer, sizeof(abuffer));
- result := utf8toucs2(@abuffer, size, @wbuffer, @unusued);
- if unusued > 0 then
- stream.Seek(-unusued, soFromCurrent);
- end;
- xnUnicode: Result := stream.Read(wbuffer, sizeof(wbuffer)) div sizeof(SOChar);
- else
- Result := 0;
- end;
- end;
- label
- redo, retry;
- begin
- // init knowned code pages
- ecp := so('{iso-8859-1: 28591,'+
- 'iso-8859-2: 28592,'+
- 'iso-8859-3: 28593,'+
- 'iso-8859-4: 28594,'+
- 'iso-8859-5: 28595,'+
- 'iso-8859-6: 28596,'+
- 'iso-8859-7: 28597,'+
- 'iso-8859-8: 28598,'+
- 'iso-8859-9: 28599,'+
- 'iso 8859-15: 28605,'+
- 'iso-2022-jp: 50220,'+
- 'shift_jis: 932,'+
- 'euc-jp: 20932,'+
- 'ascii: 20127,'+
- 'windows-1251: 1251,'+
- 'windows-1252: 1252}');
- // detect bom
- stream.Seek(0, soFromBeginning);
- len := stream.Read(bom, sizeof(bom));
- if (len >= 2) and (bom[0] = $FF) and (bom[1] = $FE) then
- begin
- encoding := xnUnicode;
- stream.Seek(2, soFromBeginning);
- end else
- if (len = 3) and (bom[0] = $EF) and (bom[1] = $BB) and (bom[2] = $BF) then
- begin
- encoding := xnUTF8;
- cp := CP_UTF8;
- end else
- begin
- encoding := xnUTF8;
- cp := 0;
- stream.Seek(0, soFromBeginning);
- end;
- with TSuperXMLParser.Create(pack) do
- try
- len := getbuffer;
- while len > 0 do
- begin
- retry:
- read := ParseBuffer(@wbuffer, PI, PIParent, len);
- cursor := 0;
- redo:
- case FError of
- xeContinue: len := getbuffer;
- xeSuccess, xeError: Break;
- xeProcessInst:
- begin
- if (PIParent = nil) and (PI.AsObject.S[xmlname] = 'xml') then
- begin
- if pack then
- encodingstr := LowerCase(trim(PI.S['encoding'])) else
- encodingstr := LowerCase(trim(PI.S[xmlattributes + '.encoding']));
- if (encodingstr <> '') then
- case encoding of
- xnUTF8: if(cp = CP_UTF8) then
- begin
- if (encodingstr <> 'utf-8') then
- begin
- FError := xeError;
- Break;
- end;
- end else
- begin
- cp := ecp.I[encodingstr];
- if cp > 0 then
- begin
- {$IFNDEF UNIX}
- encoding := xnANSI;
- Reset;
- stream.Seek(0, soFromBeginning);
- len := getbuffer;
- goto retry;
- {$ELSE}
- raise Exception.Create('charset not implemented');
- {$ENDIF}
- end;
- end;
- xnUnicode:
- if (encodingstr <> 'utf-16') and (encodingstr <> 'unicode') then
- begin
- FError := xeError;
- Break;
- end;
- end;
- end else
- if Assigned(onpi) then
- onpi(PI, PIParent);
- inc(cursor, read);
- if cursor >= len then
- begin
- len := getbuffer;
- continue;
- end;
- read := ParseBuffer(@wbuffer[cursor], PI, PIParent, len - cursor);
- goto redo;
- end;
- end;
- end;
- if FError = xeSuccess then
- Result := FStack^.obj else
- Result := nil;
- finally
- Free;
- end;
- end;
- function XMLParseString(const data: SOString; pack: Boolean; onpi: TOnProcessingInstruction): ISuperObject;
- var
- PI, PIParent: ISuperObject;
- cursor, read: Integer;
- label
- redo;
- begin
- with TSuperXMLParser.Create(pack) do
- try
- cursor := 0;
- read := ParseBuffer(PSOChar(data), PI, PIParent);
- redo:
- case FError of
- xeSuccess: Result := FStack^.obj;
- xeError: Result := nil;
- xeProcessInst:
- begin
- if Assigned(onpi) then
- onpi(PI, PIParent);
- inc(cursor, read);
- read := ParseBuffer(@data[cursor+1], PI, PIParent);
- goto redo;
- end;
- end;
- finally
- Free;
- end;
- end;
- end.
|