xmlNode.cpp

Go to the documentation of this file.
00001 
00013 #include "xmlNode.h"
00014 //#include "debug.h"
00015 
00016 #include <assert.h>
00017 
00018 #include <istream>
00019 #include <stack>
00020 
00021 namespace sxml
00022 {
00023         
00024 using namespace std;
00025 
00029 typedef enum {
00030         psNone,
00031         psElement,
00032         psAttribute,
00033         psAttrValue,
00034         psComment,
00035         psDocType,
00036         psText
00037 } XmlParseState;
00038 
00042 XmlNode::XmlNode()
00043 {
00044         type = ntUndefined;
00045         complete = false;
00046 }
00047 
00054 XmlNode::XmlNode(const NodeType type, const string& name)
00055 {
00056         this->type = type;
00057         this->name = name;
00058         this->complete = true;
00059 }
00060 
00064 XmlNode::~XmlNode()
00065 {       
00066         freeSubTree();
00067 }
00068 
00069 void XmlNode::freeSubTree() {
00070         vector<XmlNode*>::iterator it;
00071         for (it = children.begin(); it != children.end(); it++) {
00072                 delete *it;
00073         }
00074         children.clear();
00075 }
00076 
00085 void XmlNode::readFromStream(istream& readFrom, const bool readChildren) {
00086         if (((void*) readFrom) == NULL) throw eBadStream;
00087         if (!readFrom.good()) throw eBadStream;
00088         
00089         const char* whitespaces = "\t\r\n ";
00090         
00091         char prompt;
00092         
00093         XmlParseState state = psNone; // parsing is modelled as a finite state machine
00094         unsigned char c;
00095         bool tagFinished = false;
00096         bool bom = false; // true if just parsed UTF-8 byte order mark
00097         bool docTypeString = false;
00098         
00099         stack<XmlNode*> parents; // if we have to decend, remember parent nodes on a stack
00100         XmlNode* cnode = this; // current node
00101         
00102         string attrName, attrValue;
00103         
00104         readFrom.setf(ios::skipws);
00105         
00106         try {
00107                 while (!readFrom.eof()) {
00108                         readFrom >> c;
00109                         readFrom.unsetf(ios::skipws);
00110                         
00111                         switch (state) {
00112                                 
00113                                 // *** psElement ************************************************
00114                                 case psElement: {
00115                                         
00116                                         switch (c) {
00117                                                 
00118                                                 case '/': {
00119                                                         readFrom >> c;
00120                                                         if ('>' == c) {
00121                                                                 complete = true;
00122                                                                 state = psNone;
00123                                                                 tagFinished = true;
00124                                                         } else {
00125                                                                 throw eXmlParseError;
00126                                                         }
00127                                                         break;
00128                                                 }
00129                                                 
00130                                                 case '?': {
00131                                                         readFrom >> c;
00132                                                         if ('>' == c) {
00133                                                                 state = psNone;
00134                                                                 tagFinished = true;
00135                                                         } else {
00136                                                                 throw eXmlParseError;
00137                                                         }
00138                                                         break;
00139                                                 }
00140                                                 
00141                                                 case '>': {
00142                                                         state = psNone;
00143                                                         tagFinished = true;
00144                                                         break;
00145                                                 }
00146                                                 
00147                                                 case '\t':
00148                                                 case '\n':
00149                                                 case '\r':
00150                                                 case ' ': {
00151                                                         readFrom.setf(ios::skipws);
00152                                                         readFrom >> c;
00153                                                         readFrom.unsetf(ios::skipws);
00154                                                         
00155                                                         switch (c) {
00156                                                                 case '/': {
00157                                                                         readFrom >> c;
00158                                                                         if ('>' == c) {
00159                                                                                 cnode->complete = true;
00160                                                                                 state = psNone;
00161                                                                                 tagFinished = true;
00162                                                                         } else {
00163                                                                                 throw eXmlParseError;
00164                                                                         }
00165                                                                         break;
00166                                                                 }
00167                                                                 case '>': {
00168                                                                         state = psNone;
00169                                                                         tagFinished = true;
00170                                                                         cout << "Tag: " << cnode->name;
00171                                                                         cin >> prompt;
00172                                                                         break;
00173                                                                 }
00174                                                                 default: {
00175                                                                         attrName = c;
00176                                                                         state = psAttribute;
00177                                                                 }
00178                                                         }
00179                                                         break;
00180                                                 }
00181                                                 
00182                                                 default: {
00183                                                         if ((c >= 'a' && c <= 'z') ||
00184                                                                 (c >= 'A' && c <= 'Z') ||
00185                                                                 (c >= '0' && c <= '9') ||
00186                                                                 (c == ':') || (c == '-') || (c == '_'))
00187                                                         {
00188                                                                 cnode->name += c;
00189                                                         } else {
00190                                                                 throw eXmlParseError;
00191                                                         }
00192                                                 }
00193                                         }
00194                                         break;
00195                                 }
00196                                 
00197                                 // *** psAttribute **********************************************
00198                                 case psAttribute: {
00199                                         
00200                                         switch (c) {
00201                                                 
00202                                                 case '=': {
00203                                                         readFrom.setf(ios::skipws);
00204                                                         readFrom >> c;
00205                                                         readFrom.unsetf(ios::skipws);
00206                                                         
00207                                                         if ('"' == c) {
00208                                                                 state = psAttrValue;
00209                                                         } else {
00210                                                                 throw eXmlParseError;
00211                                                         }
00212                                                         break;
00213                                                 }
00214                                                 
00215                                                 case '\t':
00216                                                 case '\n':
00217                                                 case '\r':
00218                                                 case ' ': {
00219                                                         readFrom.setf(ios::skipws);
00220                                                         readFrom >> c;
00221                                                         
00222                                                         if ('=' == c) {
00223                                                                 readFrom >> c;
00224                                                                 
00225                                                                 if ('"' == c) {
00226                                                                         state = psAttrValue;
00227                                                                 } else {
00228                                                                         throw eXmlParseError;
00229                                                                 }
00230                                                         } else {
00231                                                                 throw eXmlParseError;
00232                                                         }
00233                                                         readFrom.unsetf(ios::skipws);
00234                                                         break;
00235                                                 }
00236                                                 
00237                                                 default: {
00238                                                         if ((c >= 'a' && c <= 'z') ||
00239                                                                 (c >= 'A' && c <= 'Z') ||
00240                                                                 (c >= '0' && c <= '9') ||
00241                                                                 (c == ':') || (c == '-') || (c == '_'))
00242                                                         {
00243                                                                 attrName += c;
00244                                                         } else {
00245                                                                 throw eXmlParseError;
00246                                                         }
00247                                                 }
00248                                         }
00249                                         break;
00250                                 }
00251                                 
00252                                 // *** psAttrValue **********************************************
00253                                 case psAttrValue: {
00254                                         
00255                                         switch (c) {
00256                                                 
00257                                                 case '"': {
00258                                                         // end of value
00259                                                         state = psElement;
00260                                                         cnode->attributes[attrName] = attrValue;
00261                                                         attrName = "";
00262                                                         attrValue = "";
00263                                                         break;
00264                                                 }
00265                                                 
00266                                                 default: {
00267                                                         attrValue += c;
00268                                                 }
00269                                         }
00270                                         break;
00271                                 }
00272                                 
00273                                 // *** psComment ************************************************
00274                                 case psComment: {
00275                                         
00276                                         switch (c) {
00277                                                 
00278                                                 case '-': {
00279                                                         readFrom >> c;
00280                                                         if ('-' == c) {
00281                                                                 readFrom >> c;
00282                                                                 if ('>' == c) {
00283                                                                         // end of comment
00284                                                                         
00285                                                                         state = psNone;
00286                                                                         cnode->complete = true;
00287                                                                         tagFinished = true;
00288                                                                 } else {
00289                                                                         cnode->name += "--" + c;
00290                                                                 }
00291                                                         } else {
00292                                                                 cnode->name += '-' + c;
00293                                                         }
00294                                                         break;
00295                                                 }
00296                                                 
00297                                                 default: {
00298                                                         cnode->name += c;
00299                                                 }
00300                                         }
00301                                         break;
00302                                 }
00303                                         
00304                                 // *** psText ***************************************************
00305                                 case psText: {
00306                                         
00307                                         switch (c) {
00308                                                 
00309                                                 case '<': {
00310                                                         // end of text node
00311                                                         // erase trailing white spaces
00312                                                         string::size_type ws_start = cnode->name.find_last_not_of(whitespaces);
00313                                                         cnode->name.erase(ws_start + 1);
00314                                                         
00315                                                         readFrom.unget();
00316                                                         state = psNone;
00317                                                         cnode->complete = true;
00318                                                         tagFinished = true;
00319                                                         break;
00320                                                 }
00321                                                 
00322                                                 default: {
00323                                                         cnode->name += c;
00324                                                 }
00325                                         }
00326                                         break;
00327                                 }
00328                                 
00329                                 // *** psDocType ************************************************
00330                                 case psDocType: {
00331                                                 
00332                                         switch (c) {
00333                                                 case '>': {
00334                                                         if (!docTypeString) {
00335                                                                 state = psNone;
00336                                                                 cnode->complete = true;
00337                                                                 tagFinished = true;
00338                                                         }
00339                                                         break;
00340                                                 }
00341                                                 case '"': {
00342                                                         docTypeString = !docTypeString;
00343                                                         cnode->name += c;
00344                                                         break;
00345                                                 }
00346                                                 default: {
00347                                                         cnode->name += c;
00348                                                 }
00349                                         }
00350                                         
00351                                         break;
00352                                 }
00353                                         
00354                                 // *** psNone ***************************************************
00355                                 case psNone: {
00356                                         
00357                                         switch (c) {
00358                                 
00359                                                 // element
00360                                                 case '<': {
00361                                                         assert(cnode->type == ntUndefined);
00362                                                         
00363                                                         readFrom >> c;
00364                                                         switch (c) {
00365                                                                 case '!': {
00366                                                                         readFrom >> c; 
00367                                                                         if ('-' == c) {
00368                                                                                 readFrom >> c; if ('-' != c) throw eXmlParseError;
00369                                                                                 cnode->type = ntCommentNode;
00370                                                                                 state = psComment;
00371                                                                                 
00372                                                                         } else {
00373                                                                                 if (c != 'D') throw eXmlParseError;
00374                                                                                 readFrom >> c; if (c != 'O') throw eXmlParseError;
00375                                                                                 readFrom >> c; if (c != 'C') throw eXmlParseError;
00376                                                                                 readFrom >> c; if (c != 'T') throw eXmlParseError;
00377                                                                                 readFrom >> c; if (c != 'Y') throw eXmlParseError;
00378                                                                                 readFrom >> c; if (c != 'P') throw eXmlParseError;
00379                                                                                 readFrom >> c; if (c != 'E') throw eXmlParseError;
00380                                                                                 
00381                                                                                 cnode->type = ntDocTypeNode;
00382                                                                                 state = psDocType;
00383                                                                                 readFrom.setf(ios::skipws);
00384                                                                         }
00385                                                                         break;
00386                                                                 }
00387                                                                 case '?': {
00388                                                                         cnode->type = ntDocumentNode;
00389                                                                         state = psElement;
00390                                                                         break;
00391                                                                 }
00392                                                                 case '/': {
00393                                                                         // close tag                                                            
00394                                                                         // read name
00395                                                                         string closeTagName;
00396                                                                         readFrom >> c;
00397                                                                         while (!readFrom.eof() && '>' != c) {
00398                                                                                 if ((c >= 'a' && c <= 'z') ||
00399                                                                                         (c >= 'A' && c <= 'Z') ||
00400                                                                                         (c >= '0' && c <= '9') ||
00401                                                                                         (c == ':') || (c == '-') || (c == '_'))
00402                                                                                 {
00403                                                                                         closeTagName += c;
00404                                                                                 } else {
00405                                                                                         throw eXmlParseError;
00406                                                                                 }
00407                                                                                 readFrom >> c;
00408                                                                         }
00409                                                                         
00410                                                                         if (parents.size() == 0) throw eUnexpectedCloseTag;
00411                                                                         XmlNode* pnode = parents.top();
00412                                                                         parents.pop();
00413                                                                         if (pnode->name != closeTagName) {
00414                                                                                 throw eUnexpectedCloseTag;
00415                                                                                 
00416                                                                         } else {
00417                                                                                 delete cnode; // this was an empty node struct
00418                                                                                 cnode = pnode;
00419                                                                                 cnode->complete = true; // this node will be linked to its parent below
00420                                                                                 state = psNone; 
00421                                                                                 tagFinished = true;
00422                                                                         }
00423                                                                         break;
00424                                                                 }
00425                                                                 default: {
00426                                                                         readFrom.unget();
00427                                                                         cnode->type = ntElementNode;
00428                                                                         state = psElement;
00429                                                                 }
00430                                                         }
00431                                                         break;
00432                                                 }
00433                                                 
00434                                                 // UTF-8 byte order mark
00435                                                 case 0xEF: {
00436                                                         readFrom >> c; if (c != 0xBB) throw eUtf8BomError;
00437                                                         readFrom >> c; if (c != 0xBF) throw eUtf8BomError;
00438                                                         bom = true;
00439                                                         break;
00440                                                 }
00441                                                 
00442                                                 default: {
00443                                                         cnode->type = ntTextNode;
00444                                                         cnode->name += c;
00445                                                         state = psText;
00446                                                 }
00447                                                 
00448                                         }
00449                                         break;
00450                                         
00451                                 } // psNone;
00452                                 
00453                                 default: {
00454                                         assert(false);
00455                                 }
00456                                 
00457                         } // switch (state)
00458                         
00459                         if (psNone == state) {
00460                                 if (!(tagFinished || bom)) throw eXmlParseError;
00461                                         
00462                                 if (bom) {
00463                                         bom = false;
00464                                         
00465                                 } else if (cnode->complete) {
00466                                         // check, if we can ascend
00467                                         if (parents.size() > 0) {
00468                                                 // yep
00469                                                 XmlNode* pnode = parents.top();
00470 //                                              DEBUG(DBG_XML, DBGL_INFO, "Ascending to %s\n", pnode->name.c_str());
00471                                                 assert(!pnode->complete);
00472                                                 // link node to parent
00473                                                 pnode->children.push_back(cnode);
00474                                                 cnode = new XmlNode(); // perhaps we get further children
00475                                                 
00476                                         } else {
00477                                                 // nope, so we are finished
00478                                                 break;
00479                                         }
00480                                         
00481                                 } else {
00482                                         // we probably get children ;-)
00483                                         if (readChildren) {
00484 //                                              DEBUG(DBG_XML, DBGL_INFO, "Descending from %s\n", cnode->name.c_str());
00485                                                 parents.push(cnode);
00486                                                 cnode = new XmlNode();
00487                                                 
00488                                         } else {
00489                                                 break;
00490                                         }
00491                                 }
00492                                 
00493                                 readFrom.setf(ios::skipws);
00494                         }
00495                         
00496                 } // while (!readFrom.eof());
00497                 
00498                 if (parents.size() == 1) {
00499                         cnode = parents.top();
00500                         parents.pop();
00501                         if (cnode->type == ntDocumentNode) {
00502                                 // this is ok
00503                                 cnode->complete = true;
00504                                 
00505                         } else {
00506                                 throw eUnexpectedEof;
00507                         }
00508                         
00509                 } else if (parents.size() > 1) {
00510                         throw eUnexpectedEof;
00511                 }
00512                 
00513         } catch (sxml::Exception e) {
00514                 // clean up
00515                 
00516                 while (parents.size() > 0) {
00517                         XmlNode* n = parents.top();
00518                         if (n != this) {
00519                                 if (n == cnode) cnode = NULL;
00520                                 delete n;
00521                         }
00522                         parents.pop();
00523                 }
00524                 
00525                 if (cnode != this && cnode != NULL) {
00526                         delete cnode; // we are not yet attached to a parent!
00527                 }
00528                 
00529                 throw;
00530         }
00531         
00532         assert(cnode == this);
00533 }
00534 
00542 void XmlNode::writeToStream(ostream& writeTo, const bool pretty) {
00543         if (!writeTo.good()) throw eBadStream;
00544         
00545         int indent = 0;
00546         
00547         typedef struct {
00548                 XmlNode* node;
00549                 int childindex;
00550         } ParentIndex;
00551         
00552         stack<XmlNode*> parents; // if we have to decend, remember parent nodes on a stack
00553         stack<int> childIndices; // ... and the current child index
00554         
00555         XmlNode* cnode = this; // current node
00556         int childIndex = 0;
00557         
00558         while (writeTo.good()) {
00559                 if (!cnode->complete) throw eNodeIncomplete;
00560                 
00561                 if (pretty) {
00562                         for (int i = 0; i < indent; i++) {
00563                                 writeTo << '\t';
00564                         }
00565                 }
00566                 
00567                 switch (cnode->type) {
00568                         
00569                         case ntElementNode: {
00570                                 // tag
00571                                 writeTo << '<' << cnode->name;
00572                                 
00573                                 // attributes
00574                                 map<string, string>::iterator it;
00575                                 for (it = cnode->attributes.begin(); it != cnode->attributes.end(); it++) {
00576                                         if (it->second != "") {
00577                                                 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00578                                         }
00579                                 }
00580                                 
00581                                 // closing brace
00582                                 if (childIndex < (int) cnode->children.size()) {
00583                                         writeTo << '>';
00584                                 } else {
00585                                         writeTo << " />";
00586                                 }
00587                                 
00588                                 break;
00589                         }
00590                         
00591                         case ntTextNode: {
00592                                 writeTo << cnode->name;
00593                                 break;
00594                         }
00595                         
00596                         case ntCommentNode: {
00597                                 writeTo << "<!-- " << cnode->name << " -->";
00598                                 break;
00599                         }
00600                         
00601                         case ntDocumentNode: {
00602                                 // tag
00603                                 writeTo << "<?" << cnode->name;
00604                                 
00605                                 // attributes (version and encoding at first, rest afterwards)
00606                                 map<string, string>::iterator it;
00607                                 it = cnode->attributes.find("version");
00608                                 if (it != cnode->attributes.end() && !it->second.empty()) {
00609                                         writeTo << ' ' << it->first << "=\"" << it->second << '"';
00610                                 }
00611                                 it = cnode->attributes.find("encoding");
00612                                 if (it != cnode->attributes.end() && !it->second.empty()) {
00613                                         writeTo << ' ' << it->first << "=\"" << it->second << '"';
00614                                 }
00615                                 for (it = cnode->attributes.begin(); it != cnode->attributes.end(); it++) {
00616                                         if (!it->second.empty() && it->first != "version" && it->first != "encoding") {
00617                                                 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00618                                         }
00619                                 }
00620                                 
00621                                 // closing brace
00622                                 writeTo << "?>";
00623                                 break;
00624                         }
00625                         
00626                         case ntDocTypeNode: {
00627                                 // tag
00628                                 writeTo << "<!DOCTYPE " << cnode->name << '>';
00629                                 break;
00630                         }
00631                         
00632                         default: {
00633                                 throw eUnknownNodeType;
00634                         }
00635                         
00636                 } // switch
00637                 
00638                 if (pretty) writeTo << endl;
00639                 
00640                 if (childIndex < (int) cnode->children.size()) {
00641                         if (cnode->type != ntDocumentNode) indent++;
00642                         
00643                         // descend
00644                         parents.push(cnode);
00645                         childIndices.push(childIndex);
00646                         cnode = cnode->children[childIndex];
00647                         childIndex = 0;
00648                         
00649                 } else {
00650                         // ascend
00651                         while (parents.size() > 0) {
00652                                 cnode = parents.top();
00653                                 childIndex = childIndices.top();
00654                                 childIndices.pop();
00655                                 childIndex++;
00656                                 if (childIndex < (int) cnode->children.size()) {
00657                                         // there are still children left
00658                                         childIndices.push(childIndex);
00659                                         cnode = cnode->children[childIndex];
00660                                         childIndex = 0;
00661                                         break;
00662                                         
00663                                 } else {
00664                                         // no more children, write end-tag
00665                                         parents.pop();
00666                                         indent--;
00667                                         
00668                                         if (cnode->type == ntElementNode) {
00669                                                 if (pretty) {
00670                                                         for (int i = 0; i < indent; i++) {
00671                                                                 writeTo << '\t';
00672                                                         }
00673                                                 }
00674                                                 writeTo << "</" << cnode->name << '>';
00675                                                 if (pretty) writeTo << endl;
00676                                         }
00677                                 }
00678                         } // while
00679                 }
00680                 
00681                 if (parents.size() == 0) break;
00682                 
00683         } // while
00684 }
00685 
00699 NodeSearch* XmlNode::findInit(const string& name, const bool ignoreNamespaces) {
00700         NodeSearch* ns = new NodeSearch;
00701         ns->ignoreNamespaces = ignoreNamespaces;
00702         ns->owner = this;
00703         ns->name = name;
00704         ns->parents.push(this);
00705         ns->childIndices.push(-1);
00706         
00707         return ns;
00708 }
00709 
00721 XmlNode* XmlNode::findNext(NodeSearch* ns) {
00722         assert(ns != NULL && ns->owner == this);
00723         assert(ns->parents.size() > 0);
00724         assert(ns->childIndices.size() > 0);
00725         
00726         XmlNode* cnode;
00727         int childIndex;
00728         
00729         while (ns->parents.size() > 0) {
00730                 cnode = ns->parents.top();
00731                 childIndex = ns->childIndices.top();
00732                 if (childIndex == -1) {
00733                         childIndex++;
00734                         ns->childIndices.pop();
00735                         ns->childIndices.push(childIndex);
00736                         
00737                         if (ns->ignoreNamespaces) {
00738                                 string::size_type pos = cnode->name.find(":", 0);
00739                                 if (pos != string::npos) {
00740                                         // namespace given
00741                                         string s = cnode->name.substr(pos + 1, cnode->name.length() - pos - 1);
00742                                         if (s == ns->name) {
00743                                                 return cnode;
00744                                         }
00745                                         
00746                                 } else {
00747                                         if (cnode->name == ns->name) {
00748                                                 return cnode;
00749                                         }
00750                                 }
00751                                 
00752                         } else {
00753                                 if (cnode->name == ns->name) {
00754                                         return cnode;
00755                                 }
00756                         }
00757                         
00758                 } else {
00759                         if (childIndex < (int) cnode->children.size()) {
00760                                 // descend
00761                                 ns->parents.push(cnode->children[childIndex]);
00762                                 childIndex++;
00763                                 ns->childIndices.pop();
00764                                 ns->childIndices.push(childIndex);
00765                                 ns->childIndices.push(-1);
00766                                 
00767                         } else {
00768                                 // ascend
00769                                 ns->parents.pop();
00770                                 ns->childIndices.pop();
00771                         }
00772                 }
00773         }
00774         
00775         return NULL;
00776 }
00777 
00785 void XmlNode::findFree(NodeSearch* ns) {
00786         assert(ns != NULL && ns->owner == this);
00787         delete ns;
00788 }
00789 
00801 XmlNode* XmlNode::findFirst(const string& name, const bool ignoreNamespaces) {
00802         NodeSearch* ns = findInit(name, ignoreNamespaces);
00803         XmlNode* res = findNext(ns);
00804         findFree(ns);
00805         
00806         return res;
00807 }
00808 
00809 } // namespace xsml

Generated on Sun Dec 10 18:47:09 2006 for XmlNode by  doxygen 1.4.6