00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "CeylanXMLParser.h"
00028
00029 #include "CeylanTree.h"
00030 #include "CeylanXML.h"
00031 #include "CeylanXMLElement.h"
00032 #include "CeylanFile.h"
00033 #include "CeylanXMLVisitor.h"
00034 #include "CeylanLogPlug.h"
00035 #include "CeylanOperators.h"
00036 #include "CeylanHolder.h"
00037
00038
00039 #ifdef CEYLAN_USES_CONFIG_H
00040 #include "CeylanConfig.h"
00041 #endif // CEYLAN_USES_CONFIG_H
00042
00043
00044 #include <stack>
00045
00046
00047 using namespace Ceylan ;
00048 using namespace Ceylan::Log ;
00049 using namespace Ceylan::System ;
00050 using namespace Ceylan::XML ;
00051
00052 using std::string ;
00053 using std::stack ;
00054
00055
00056
00057
00058
00059 #if CEYLAN_DEBUG_XML
00060
00061 #define DISPLAY_DEBUG_XML(message) LogPlug::debug(message)
00062
00063 #else // CEYLAN_DEBUG_XML
00064
00065 #define DISPLAY_DEBUG_XML(message)
00066
00067 #endif // CEYLAN_DEBUG_XML
00068
00069
00070
00071
00072 XMLParserException::XMLParserException( const std::string & reason ) :
00073 XMLException( reason )
00074 {
00075
00076 }
00077
00078
00079
00080 XMLParserException::~XMLParserException() throw()
00081 {
00082
00083 }
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 std::string XMLParser::DefaultEncoding = Latin1WithEuroEncoding ;
00095
00096
00097
00098 XMLParser::XMLParser( const std::string & filename ) :
00099 _filename( filename ),
00100 _parsedTree( 0 ),
00101 _encoding( DefaultEncoding )
00102 {
00103
00104 }
00105
00106
00107
00108 XMLParser::~XMLParser() throw()
00109 {
00110
00111 if ( _parsedTree != 0 )
00112 delete _parsedTree ;
00113
00114 }
00115
00116
00117
00118 bool XMLParser::hasXMLTree() const
00119 {
00120
00121 return ( _parsedTree != 0 ) ;
00122
00123 }
00124
00125
00126
00127 XMLParser::XMLTree & XMLParser::getXMLTree() const
00128 {
00129
00130 if ( _parsedTree == 0 )
00131 throw XMLParserException( "XMLParser::getXMLTree: no available tree" ) ;
00132
00133 return *_parsedTree ;
00134
00135 }
00136
00137
00138
00139 void XMLParser::setXMLTree( XMLTree & newTree )
00140 {
00141
00142 if ( _parsedTree != 0 )
00143 delete _parsedTree ;
00144
00145 _parsedTree = &newTree ;
00146
00147 }
00148
00149
00150
00151 void XMLParser::saveToFile( const std::string & filename ) const
00152 {
00153
00154 if ( _parsedTree == 0 )
00155 throw XMLParserException( "XMLParser::saveToFile: "
00156 "no parsed tree to serialize." ) ;
00157
00158 string actualFileName ;
00159
00160 if ( filename.empty() )
00161 actualFileName = _filename ;
00162 else
00163 actualFileName = filename ;
00164
00165 try
00166 {
00167
00168 File & xmlFile = File::Create( actualFileName ) ;
00169
00170
00171 string header = "<?xml version=\"1.0\" encoding=\""
00172 + _encoding + "\"?>\n" ;
00173
00174 xmlFile.write( header ) ;
00175
00176
00177 XMLSavingVisitor myVisitor( xmlFile ) ;
00178
00179 _parsedTree->accept( myVisitor ) ;
00180
00181 xmlFile.close() ;
00182
00183 }
00184 catch( const SystemException & e )
00185 {
00186
00187 throw XMLParserException( "XMLParser::saveToFile failed: "
00188 + e.toString() ) ;
00189
00190 }
00191
00192
00193 }
00194
00195
00196
00197 void XMLParser::loadFromFile()
00198 {
00199
00200 try
00201 {
00202
00203 if ( ! File::ExistsAsFileOrSymbolicLink( _filename ) )
00204 throw XMLParserException( "XMLParser::loadFromFile failed: "
00205 "file '" + _filename + "' does not exist." ) ;
00206
00207 if ( _parsedTree != 0 )
00208 {
00209
00210 delete _parsedTree ;
00211 _parsedTree = 0 ;
00212
00213 }
00214
00215
00216 Holder<File> inputFileHolder( File::Open( _filename ) ) ;
00217
00218 Ceylan::Uint8 readChar ;
00219 inputFileHolder->skipWhitespaces( readChar ) ;
00220
00221 if ( readChar != LowerThan )
00222 throw XMLParserException( "XMLParser::loadFromFile failed: "
00223 "first non-space character is not '"
00224 + Ceylan::toString( LowerThan ) + "', read '"
00225 + Ceylan::toString( readChar ) + "' instead." ) ;
00226
00227
00228
00229 LowerThanSequence seq = InterpretLowerThanSequence(
00230 inputFileHolder.get(), readChar ) ;
00231
00232 if ( seq != XMLParser::Declaration )
00233 throw XMLParserException( "XMLParser::loadFromFile failed: "
00234 "expected to read an XML declaration, but read a "
00235 + DescribeLowerThanSequence( seq ) ) ;
00236
00237
00238 InterpretXMLDeclaration( inputFileHolder.get() ) ;
00239
00240 Ceylan::Uint8 remaining ;
00241
00242 inputFileHolder->skipWhitespaces( remaining ) ;
00243
00244
00245
00246 if ( remaining != XML::LowerThan )
00247 throw XMLParserException( "XMLParser::loadFromFile failed: "
00248 "after declaration, first non-whitespace character "
00249 "is not '<'." ) ;
00250
00251
00252 stack<string> markupStack ;
00253
00254
00255 handleNextElement( inputFileHolder.get(), markupStack, _parsedTree,
00256 remaining ) ;
00257
00258
00259
00260 }
00261 catch( const System::SystemException & e )
00262 {
00263
00264 throw XMLParserException( "XMLParser::loadFromFile failed: "
00265 + e.toString() ) ;
00266
00267 }
00268
00269 }
00270
00271
00272
00273 XMLParser::LowerThanSequence XMLParser::InterpretLowerThanSequence(
00274 InputStream & input, Ceylan::Uint8 & readChar )
00275 {
00276
00277 readChar = input.readUint8() ;
00278
00279 switch( readChar )
00280 {
00281
00282 case XML::QuestionMark:
00283 return XMLParser::Declaration ;
00284 break ;
00285
00286 case XML::ExclamationMark:
00287 return XMLParser::Comment ;
00288 break ;
00289
00290 case XML::Slash:
00291 return XMLParser::ClosingMarkup ;
00292 break ;
00293
00294 default:
00295
00296 break ;
00297 }
00298
00299
00300 if ( Ceylan::isLetter( readChar ) )
00301 return XMLParser::OpeningMarkup ;
00302
00303 return XMLParser::UnexpectedElement ;
00304
00305 }
00306
00307
00308
00309 std::string XMLParser::DescribeLowerThanSequence( LowerThanSequence sequence )
00310 {
00311
00312 switch( sequence )
00313 {
00314
00315 case XMLParser::Declaration:
00316 return "XML declaration" ;
00317 break ;
00318
00319 case XMLParser::Comment:
00320 return "XML comment" ;
00321 break ;
00322
00323 case XMLParser::OpeningMarkup:
00324 return "XML opening markup" ;
00325 break ;
00326
00327 case XMLParser::ClosingMarkup:
00328 return "XML closing markup" ;
00329 break ;
00330
00331 case XMLParser::UnexpectedElement:
00332 return "unexpected XML element" ;
00333 break ;
00334
00335 default:
00336 return "unknown sequence type (abnormal)" ;
00337 break ;
00338
00339 }
00340
00341
00342 }
00343
00344
00345
00346 void XMLParser::InterpretXMLDeclaration( InputStream & input )
00347 {
00348
00349
00350
00351 string res ;
00352
00353 Ceylan::Uint8 readChar ;
00354
00355
00356 while ( ( readChar = input.readUint8() ) != XML::QuestionMark )
00357 res += readChar ;
00358
00359
00360 readChar = input.readUint8() ;
00361
00362 if ( readChar != XML::HigherThan )
00363 throw XMLParserException( "XMLParser::InterpretXMLDeclaration: "
00364 "expected to finish XML declaration with '?>', read '"
00365 + res + Ceylan::toString( XML::QuestionMark )
00366 + Ceylan::toString( readChar ) + "'." ) ;
00367
00368
00369
00370 if ( res.find( "xml", 0 ) != 0 )
00371 throw XMLParserException( "XMLParser::InterpretXMLDeclaration: "
00372 "expected to find 'xml', read '" + res + "'." ) ;
00373
00374
00375 res = res.substr( 3 ) ;
00376
00377
00378
00379
00380
00381
00382
00383
00384 AttributeMap declarationMap ;
00385
00386 ParseAttributeSequence( res, declarationMap ) ;
00387
00388 DISPLAY_DEBUG_XML( "XMLParser::InterpretXMLDeclaration parsed: "
00389 + Ceylan::formatStringMap( declarationMap ) ) ;
00390
00391 AttributeMap::const_iterator it = declarationMap.find( "version" ) ;
00392
00393 if ( it == declarationMap.end() )
00394 throw XMLParserException( "XMLParser::InterpretXMLDeclaration: "
00395 "no XML 'version' attribute found." ) ;
00396
00397 if ( (*it).second != "1.0" )
00398 throw XMLParserException( "XMLParser::InterpretXMLDeclaration: "
00399 "only the 1.0 version of XML is supported, whereas the "
00400 + (*it).second + " version was specified." ) ;
00401
00402 it = declarationMap.find( "encoding" ) ;
00403 if ( it == declarationMap.end() )
00404 LogPlug::warning( "XMLParser::InterpretXMLDeclaration: "
00405 "no XML encoding attribute found in declaration, "
00406 "falling back to default one, "
00407 + XML::Latin1WithEuroEncoding + "." ) ;
00408
00409 if ( (*it).second != XML::Latin1WithEuroEncoding )
00410 throw XMLParserException(
00411 "XMLParser::InterpretXMLDeclaration: only the "
00412 + XML::Latin1WithEuroEncoding
00413 +" encoding is supported, whereas the "
00414 + (*it).second + " encoding was specified." ) ;
00415
00416 }
00417
00418
00419
00420 void XMLParser::ParseAttributeSequence( const string & toBeParsed,
00421 AttributeMap & attributeMap )
00422 {
00423
00424 DISPLAY_DEBUG_XML( "XMLParser::ParseAttributeSequence: will parse '"
00425 + toBeParsed + "'." ) ;
00426
00427 StringSize size = toBeParsed.size() ;
00428 StringSize index = 0 ;
00429
00430 string attributeName ;
00431 string attributeValue ;
00432
00433 while ( index < size )
00434 {
00435
00436
00437 while ( index < size && Ceylan::isWhitespace( toBeParsed[index] ) )
00438 index++ ;
00439
00440 if ( index >= size )
00441 return ;
00442
00443
00444 if ( ! Ceylan::isLetter( toBeParsed[index] ) )
00445 throw XMLParserException( "XMLParser::ParseAttributeSequence: "
00446 "expecting first character of attribute name, read '"
00447 + Ceylan::toString( toBeParsed[index] ) + "' instead." ) ;
00448
00449 attributeName = toBeParsed[index] ;
00450 index++ ;
00451
00452
00453
00454
00455
00456
00457
00458
00459 while ( index < size && ( ! Ceylan::isWhitespace( toBeParsed[index] ) )
00460 && toBeParsed[index] != XML::Equal )
00461 {
00462 attributeName += toBeParsed[index] ;
00463 index++ ;
00464 }
00465
00466 DISPLAY_DEBUG_XML( "XMLParser::ParseAttributeSequence: "
00467 "adding attribute name '" + attributeName + "'." ) ;
00468
00469 attributeMap.insert( make_pair( attributeName, "" ) ) ;
00470
00471
00472
00473 if ( index >= size )
00474 return ;
00475
00476 while ( index < size && Ceylan::isWhitespace( toBeParsed[index] ) )
00477 index++ ;
00478
00479 if ( index >= size )
00480 return ;
00481
00482 if ( toBeParsed[index] == XML::Equal )
00483 {
00484
00485 index++ ;
00486
00487 while ( index < size && Ceylan::isWhitespace( toBeParsed[index] ) )
00488 index++ ;
00489
00490 if ( index >= size )
00491 return ;
00492
00493
00494 if ( toBeParsed[index] != XML::DoubleQuote )
00495 throw XMLParserException( "XMLParser::ParseAttributeSequence: "
00496 "expecting double quotes to begin attribute value, read '"
00497 + Ceylan::toString( toBeParsed[index] ) + "' instead." ) ;
00498
00499 index++ ;
00500
00501 if ( index >= size )
00502 return ;
00503
00504 while ( index < size && toBeParsed[index] != XML::DoubleQuote )
00505 {
00506 attributeValue += toBeParsed[index] ;
00507 index++ ;
00508 }
00509
00510 index++ ;
00511
00512 DISPLAY_DEBUG_XML( "XMLParser::ParseAttributeSequence: "
00513 "associating to attribute name '" + attributeName
00514 + "' the following attribute value: '" + attributeValue
00515 + "'." ) ;
00516
00517 attributeMap[ attributeName ] = attributeValue ;
00518
00519
00520 if ( index >= size )
00521 return ;
00522
00523 }
00524
00525 attributeName.clear() ;
00526 attributeValue.clear() ;
00527
00528 }
00529
00530 }
00531
00532
00533
00534 const string XMLParser::toString( Ceylan::VerbosityLevels level ) const
00535 {
00536
00537 string res = "XML parser " ;
00538
00539 if ( _parsedTree != 0 )
00540 res += "with following tree in memory: "
00541 + _parsedTree->toString( level ) ;
00542 else
00543 res += "with no tree in memory" ;
00544
00545 if ( level == Ceylan::low )
00546 return res ;
00547
00548 if ( _filename.empty() )
00549 res += ". No serialization file defined" ;
00550 else
00551 res += ". Serialization file is '" + _filename + "'" ;
00552
00553 return res ;
00554
00555 }
00556
00557
00558
00559 void XMLParser::handleNextElement( System::InputStream & input,
00560 stack<string> & markupStack, XMLTree * currentTree,
00561 Ceylan::Uint8 & remaining )
00562 {
00563
00564
00565
00566
00567
00568
00569
00570
00571 if ( remaining != XML::LowerThan )
00572 {
00573
00574
00575 string text ;
00576 text += static_cast<char>( remaining ) ;
00577
00578
00579 while ( ( remaining = input.readUint8() ) != XML::LowerThan )
00580 text += remaining ;
00581
00582
00583
00584
00585
00586
00587
00588 StringSize index = text.size() - 1 ;
00589
00590 while ( index > 0 && Ceylan::isWhitespace( text[index] ) )
00591 index-- ;
00592
00593 if ( index < text.size() - 1 )
00594 text = text.substr( 0, index + 1 ) ;
00595
00596 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: "
00597 "creating XML text element from '" + text + "'." ) ;
00598
00599 XMLText * newText = new XMLText( text ) ;
00600
00601 XMLParser::XMLTree * newNode = new XMLParser::XMLTree( *newText ) ;
00602
00603 if ( currentTree == 0 )
00604 throw XMLParserException( "XMLParser::handleNextElement: "
00605 "text '" + text + "' must be enclosed in markups." ) ;
00606
00607 currentTree->addSon( *newNode ) ;
00608
00609 handleNextElement( input, markupStack, currentTree, remaining ) ;
00610
00611 return ;
00612
00613 }
00614
00615
00616
00617
00618 LowerThanSequence seq = InterpretLowerThanSequence( input, remaining ) ;
00619
00620 if ( seq != OpeningMarkup && seq != ClosingMarkup )
00621 throw XMLParserException( "XMLParser::handleNextElement: "
00622 "expecting opening or closing markup, found "
00623 + DescribeLowerThanSequence( seq ) + "." ) ;
00624
00625
00626 string markupName ;
00627
00628
00629 if ( seq == OpeningMarkup )
00630 markupName = remaining ;
00631
00632 remaining = input.readUint8() ;
00633
00634 while ( ! Ceylan::isWhitespace( remaining )
00635 && remaining != XML::HigherThan )
00636 {
00637 markupName += remaining ;
00638 remaining = input.readUint8() ;
00639 }
00640
00641 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: read markup '"
00642 + markupName + "'." ) ;
00643
00644 if ( seq == OpeningMarkup )
00645 {
00646
00647 XMLMarkup * newMarkup = new XMLMarkup( markupName ) ;
00648 markupStack.push( markupName ) ;
00649
00650
00651
00652
00653
00654
00655 bool soloTagToClose = false ;
00656
00657
00658 if ( Ceylan::isWhitespace( remaining ) )
00659 input.skipWhitespaces( remaining ) ;
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669 if ( remaining == XML::Slash )
00670 {
00671
00672
00673 remaining = input.readUint8() ;
00674
00675 if ( remaining != HigherThan )
00676 throw XMLParserException( "XMLParser::handleNextElement: "
00677 "expecting ending '/>' for solo tag '" + markupName
00678 + "', found '/" + Ceylan::toString( remaining ) + "'." ) ;
00679
00680 soloTagToClose = true ;
00681
00682 }
00683 else if ( remaining != XML::HigherThan )
00684 {
00685
00686
00687 string restOfMarkup ;
00688
00689 do
00690 {
00691
00692 restOfMarkup += remaining ;
00693
00694 }
00695 while ( ( remaining = input.readUint8() ) != XML::HigherThan ) ;
00696
00697 ParseAttributeSequence( restOfMarkup, newMarkup->getAttributes() ) ;
00698
00699 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: "
00700 "read markup is now '"
00701 + encodeToHTML( newMarkup->toString() ) + "'." ) ;
00702
00703 }
00704
00705
00706
00707 XMLParser::XMLTree * newNode = new XMLParser::XMLTree( *newMarkup ) ;
00708
00709
00710 if ( currentTree != 0 )
00711 {
00712 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: "
00713 "adding son to current parsed tree." ) ;
00714 currentTree->addSon( *newNode ) ;
00715 currentTree = newNode ;
00716 }
00717 else
00718 {
00719 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: "
00720 "creating a new parsed tree." ) ;
00721 setXMLTree( *newNode ) ;
00722 currentTree = _parsedTree ;
00723 }
00724
00725 if ( soloTagToClose )
00726 {
00727
00728
00729
00730
00731
00732
00733
00734
00735 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: "
00736 "closing solo-tag based markup '" + markupName
00737 + "' as expected." ) ;
00738
00739 markupStack.pop() ;
00740
00741
00742 if ( markupStack.empty() )
00743 return ;
00744
00745
00746 XMLTree * tempTree = _parsedTree->getFather( *currentTree ) ;
00747
00748 if ( tempTree == 0 )
00749 throw XMLParserException( "XMLParser::handleNextElement: "
00750 "no father found for current tree '"
00751 + currentTree->toString()
00752 + "' in parsed tree: '" + _parsedTree->toString() + "'." ) ;
00753
00754 currentTree = tempTree ;
00755
00756
00757 }
00758
00759 }
00760 else
00761 {
00762
00763
00764 string toBeClosed = markupStack.top() ;
00765
00766 if ( toBeClosed != markupName )
00767 throw XMLParserException( "XMLParser::handleNextElement: "
00768 "expecting closing markup for '" + toBeClosed
00769 + "', found closing markup for '" + markupName + "'." ) ;
00770
00771 DISPLAY_DEBUG_XML( "XMLParser::handleNextElement: closing markup '"
00772 + markupName + "' as expected." ) ;
00773
00774 markupStack.pop() ;
00775
00776
00777 if ( markupStack.empty() )
00778 return ;
00779
00780
00781 XMLTree * tempTree = _parsedTree->getFather( *currentTree ) ;
00782
00783 if ( tempTree == 0 )
00784 throw XMLParserException( "XMLParser::handleNextElement: "
00785 "no father found for current tree '" + currentTree->toString()
00786 + "' in parsed tree: '" + _parsedTree->toString() + "'." ) ;
00787
00788 currentTree = tempTree ;
00789
00790
00791 if ( remaining != XML::HigherThan )
00792 while ( input.readUint8() != XML::HigherThan )
00793 ;
00794
00795 }
00796
00797
00798 input.skipWhitespaces( remaining ) ;
00799
00800 handleNextElement( input, markupStack, currentTree, remaining ) ;
00801
00802 }
00803