00001
00013 #include "xmlNode.h"
00014
00015
00016 #include <assert.h>
00017
00018 #include <istream>
00019 #include <stack>
00020
00021 namespace sxml
00022 {
00023
00024 using namespace std;
00025
00029 typedef enum {
00030 psNone,
00031 psElement,
00032 psAttribute,
00033 psAttrValue,
00034 psComment,
00035 psDocType,
00036 psText
00037 } XmlParseState;
00038
00042 XmlNode::XmlNode()
00043 {
00044 type = ntUndefined;
00045 complete = false;
00046 }
00047
00054 XmlNode::XmlNode(const NodeType type, const string& name)
00055 {
00056 this->type = type;
00057 this->name = name;
00058 this->complete = true;
00059 }
00060
00064 XmlNode::~XmlNode()
00065 {
00066 freeSubTree();
00067 }
00068
00069 void XmlNode::freeSubTree() {
00070 vector<XmlNode*>::iterator it;
00071 for (it = children.begin(); it != children.end(); it++) {
00072 delete *it;
00073 }
00074 children.clear();
00075 }
00076
00085 void XmlNode::readFromStream(istream& readFrom, const bool readChildren) {
00086 if (((void*) readFrom) == NULL) throw eBadStream;
00087 if (!readFrom.good()) throw eBadStream;
00088
00089 const char* whitespaces = "\t\r\n ";
00090
00091 char prompt;
00092
00093 XmlParseState state = psNone;
00094 unsigned char c;
00095 bool tagFinished = false;
00096 bool bom = false;
00097 bool docTypeString = false;
00098
00099 stack<XmlNode*> parents;
00100 XmlNode* cnode = this;
00101
00102 string attrName, attrValue;
00103
00104 readFrom.setf(ios::skipws);
00105
00106 try {
00107 while (!readFrom.eof()) {
00108 readFrom >> c;
00109 readFrom.unsetf(ios::skipws);
00110
00111 switch (state) {
00112
00113
00114 case psElement: {
00115
00116 switch (c) {
00117
00118 case '/': {
00119 readFrom >> c;
00120 if ('>' == c) {
00121 complete = true;
00122 state = psNone;
00123 tagFinished = true;
00124 } else {
00125 throw eXmlParseError;
00126 }
00127 break;
00128 }
00129
00130 case '?': {
00131 readFrom >> c;
00132 if ('>' == c) {
00133 state = psNone;
00134 tagFinished = true;
00135 } else {
00136 throw eXmlParseError;
00137 }
00138 break;
00139 }
00140
00141 case '>': {
00142 state = psNone;
00143 tagFinished = true;
00144 break;
00145 }
00146
00147 case '\t':
00148 case '\n':
00149 case '\r':
00150 case ' ': {
00151 readFrom.setf(ios::skipws);
00152 readFrom >> c;
00153 readFrom.unsetf(ios::skipws);
00154
00155 switch (c) {
00156 case '/': {
00157 readFrom >> c;
00158 if ('>' == c) {
00159 cnode->complete = true;
00160 state = psNone;
00161 tagFinished = true;
00162 } else {
00163 throw eXmlParseError;
00164 }
00165 break;
00166 }
00167 case '>': {
00168 state = psNone;
00169 tagFinished = true;
00170 cout << "Tag: " << cnode->name;
00171 cin >> prompt;
00172 break;
00173 }
00174 default: {
00175 attrName = c;
00176 state = psAttribute;
00177 }
00178 }
00179 break;
00180 }
00181
00182 default: {
00183 if ((c >= 'a' && c <= 'z') ||
00184 (c >= 'A' && c <= 'Z') ||
00185 (c >= '0' && c <= '9') ||
00186 (c == ':') || (c == '-') || (c == '_'))
00187 {
00188 cnode->name += c;
00189 } else {
00190 throw eXmlParseError;
00191 }
00192 }
00193 }
00194 break;
00195 }
00196
00197
00198 case psAttribute: {
00199
00200 switch (c) {
00201
00202 case '=': {
00203 readFrom.setf(ios::skipws);
00204 readFrom >> c;
00205 readFrom.unsetf(ios::skipws);
00206
00207 if ('"' == c) {
00208 state = psAttrValue;
00209 } else {
00210 throw eXmlParseError;
00211 }
00212 break;
00213 }
00214
00215 case '\t':
00216 case '\n':
00217 case '\r':
00218 case ' ': {
00219 readFrom.setf(ios::skipws);
00220 readFrom >> c;
00221
00222 if ('=' == c) {
00223 readFrom >> c;
00224
00225 if ('"' == c) {
00226 state = psAttrValue;
00227 } else {
00228 throw eXmlParseError;
00229 }
00230 } else {
00231 throw eXmlParseError;
00232 }
00233 readFrom.unsetf(ios::skipws);
00234 break;
00235 }
00236
00237 default: {
00238 if ((c >= 'a' && c <= 'z') ||
00239 (c >= 'A' && c <= 'Z') ||
00240 (c >= '0' && c <= '9') ||
00241 (c == ':') || (c == '-') || (c == '_'))
00242 {
00243 attrName += c;
00244 } else {
00245 throw eXmlParseError;
00246 }
00247 }
00248 }
00249 break;
00250 }
00251
00252
00253 case psAttrValue: {
00254
00255 switch (c) {
00256
00257 case '"': {
00258
00259 state = psElement;
00260 cnode->attributes[attrName] = attrValue;
00261 attrName = "";
00262 attrValue = "";
00263 break;
00264 }
00265
00266 default: {
00267 attrValue += c;
00268 }
00269 }
00270 break;
00271 }
00272
00273
00274 case psComment: {
00275
00276 switch (c) {
00277
00278 case '-': {
00279 readFrom >> c;
00280 if ('-' == c) {
00281 readFrom >> c;
00282 if ('>' == c) {
00283
00284
00285 state = psNone;
00286 cnode->complete = true;
00287 tagFinished = true;
00288 } else {
00289 cnode->name += "--" + c;
00290 }
00291 } else {
00292 cnode->name += '-' + c;
00293 }
00294 break;
00295 }
00296
00297 default: {
00298 cnode->name += c;
00299 }
00300 }
00301 break;
00302 }
00303
00304
00305 case psText: {
00306
00307 switch (c) {
00308
00309 case '<': {
00310
00311
00312 string::size_type ws_start = cnode->name.find_last_not_of(whitespaces);
00313 cnode->name.erase(ws_start + 1);
00314
00315 readFrom.unget();
00316 state = psNone;
00317 cnode->complete = true;
00318 tagFinished = true;
00319 break;
00320 }
00321
00322 default: {
00323 cnode->name += c;
00324 }
00325 }
00326 break;
00327 }
00328
00329
00330 case psDocType: {
00331
00332 switch (c) {
00333 case '>': {
00334 if (!docTypeString) {
00335 state = psNone;
00336 cnode->complete = true;
00337 tagFinished = true;
00338 }
00339 break;
00340 }
00341 case '"': {
00342 docTypeString = !docTypeString;
00343 cnode->name += c;
00344 break;
00345 }
00346 default: {
00347 cnode->name += c;
00348 }
00349 }
00350
00351 break;
00352 }
00353
00354
00355 case psNone: {
00356
00357 switch (c) {
00358
00359
00360 case '<': {
00361 assert(cnode->type == ntUndefined);
00362
00363 readFrom >> c;
00364 switch (c) {
00365 case '!': {
00366 readFrom >> c;
00367 if ('-' == c) {
00368 readFrom >> c; if ('-' != c) throw eXmlParseError;
00369 cnode->type = ntCommentNode;
00370 state = psComment;
00371
00372 } else {
00373 if (c != 'D') throw eXmlParseError;
00374 readFrom >> c; if (c != 'O') throw eXmlParseError;
00375 readFrom >> c; if (c != 'C') throw eXmlParseError;
00376 readFrom >> c; if (c != 'T') throw eXmlParseError;
00377 readFrom >> c; if (c != 'Y') throw eXmlParseError;
00378 readFrom >> c; if (c != 'P') throw eXmlParseError;
00379 readFrom >> c; if (c != 'E') throw eXmlParseError;
00380
00381 cnode->type = ntDocTypeNode;
00382 state = psDocType;
00383 readFrom.setf(ios::skipws);
00384 }
00385 break;
00386 }
00387 case '?': {
00388 cnode->type = ntDocumentNode;
00389 state = psElement;
00390 break;
00391 }
00392 case '/': {
00393
00394
00395 string closeTagName;
00396 readFrom >> c;
00397 while (!readFrom.eof() && '>' != c) {
00398 if ((c >= 'a' && c <= 'z') ||
00399 (c >= 'A' && c <= 'Z') ||
00400 (c >= '0' && c <= '9') ||
00401 (c == ':') || (c == '-') || (c == '_'))
00402 {
00403 closeTagName += c;
00404 } else {
00405 throw eXmlParseError;
00406 }
00407 readFrom >> c;
00408 }
00409
00410 if (parents.size() == 0) throw eUnexpectedCloseTag;
00411 XmlNode* pnode = parents.top();
00412 parents.pop();
00413 if (pnode->name != closeTagName) {
00414 throw eUnexpectedCloseTag;
00415
00416 } else {
00417 delete cnode;
00418 cnode = pnode;
00419 cnode->complete = true;
00420 state = psNone;
00421 tagFinished = true;
00422 }
00423 break;
00424 }
00425 default: {
00426 readFrom.unget();
00427 cnode->type = ntElementNode;
00428 state = psElement;
00429 }
00430 }
00431 break;
00432 }
00433
00434
00435 case 0xEF: {
00436 readFrom >> c; if (c != 0xBB) throw eUtf8BomError;
00437 readFrom >> c; if (c != 0xBF) throw eUtf8BomError;
00438 bom = true;
00439 break;
00440 }
00441
00442 default: {
00443 cnode->type = ntTextNode;
00444 cnode->name += c;
00445 state = psText;
00446 }
00447
00448 }
00449 break;
00450
00451 }
00452
00453 default: {
00454 assert(false);
00455 }
00456
00457 }
00458
00459 if (psNone == state) {
00460 if (!(tagFinished || bom)) throw eXmlParseError;
00461
00462 if (bom) {
00463 bom = false;
00464
00465 } else if (cnode->complete) {
00466
00467 if (parents.size() > 0) {
00468
00469 XmlNode* pnode = parents.top();
00470
00471 assert(!pnode->complete);
00472
00473 pnode->children.push_back(cnode);
00474 cnode = new XmlNode();
00475
00476 } else {
00477
00478 break;
00479 }
00480
00481 } else {
00482
00483 if (readChildren) {
00484
00485 parents.push(cnode);
00486 cnode = new XmlNode();
00487
00488 } else {
00489 break;
00490 }
00491 }
00492
00493 readFrom.setf(ios::skipws);
00494 }
00495
00496 }
00497
00498 if (parents.size() == 1) {
00499 cnode = parents.top();
00500 parents.pop();
00501 if (cnode->type == ntDocumentNode) {
00502
00503 cnode->complete = true;
00504
00505 } else {
00506 throw eUnexpectedEof;
00507 }
00508
00509 } else if (parents.size() > 1) {
00510 throw eUnexpectedEof;
00511 }
00512
00513 } catch (sxml::Exception e) {
00514
00515
00516 while (parents.size() > 0) {
00517 XmlNode* n = parents.top();
00518 if (n != this) {
00519 if (n == cnode) cnode = NULL;
00520 delete n;
00521 }
00522 parents.pop();
00523 }
00524
00525 if (cnode != this && cnode != NULL) {
00526 delete cnode;
00527 }
00528
00529 throw;
00530 }
00531
00532 assert(cnode == this);
00533 }
00534
00542 void XmlNode::writeToStream(ostream& writeTo, const bool pretty) {
00543 if (!writeTo.good()) throw eBadStream;
00544
00545 int indent = 0;
00546
00547 typedef struct {
00548 XmlNode* node;
00549 int childindex;
00550 } ParentIndex;
00551
00552 stack<XmlNode*> parents;
00553 stack<int> childIndices;
00554
00555 XmlNode* cnode = this;
00556 int childIndex = 0;
00557
00558 while (writeTo.good()) {
00559 if (!cnode->complete) throw eNodeIncomplete;
00560
00561 if (pretty) {
00562 for (int i = 0; i < indent; i++) {
00563 writeTo << '\t';
00564 }
00565 }
00566
00567 switch (cnode->type) {
00568
00569 case ntElementNode: {
00570
00571 writeTo << '<' << cnode->name;
00572
00573
00574 map<string, string>::iterator it;
00575 for (it = cnode->attributes.begin(); it != cnode->attributes.end(); it++) {
00576 if (it->second != "") {
00577 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00578 }
00579 }
00580
00581
00582 if (childIndex < (int) cnode->children.size()) {
00583 writeTo << '>';
00584 } else {
00585 writeTo << " />";
00586 }
00587
00588 break;
00589 }
00590
00591 case ntTextNode: {
00592 writeTo << cnode->name;
00593 break;
00594 }
00595
00596 case ntCommentNode: {
00597 writeTo << "<!-- " << cnode->name << " -->";
00598 break;
00599 }
00600
00601 case ntDocumentNode: {
00602
00603 writeTo << "<?" << cnode->name;
00604
00605
00606 map<string, string>::iterator it;
00607 it = cnode->attributes.find("version");
00608 if (it != cnode->attributes.end() && !it->second.empty()) {
00609 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00610 }
00611 it = cnode->attributes.find("encoding");
00612 if (it != cnode->attributes.end() && !it->second.empty()) {
00613 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00614 }
00615 for (it = cnode->attributes.begin(); it != cnode->attributes.end(); it++) {
00616 if (!it->second.empty() && it->first != "version" && it->first != "encoding") {
00617 writeTo << ' ' << it->first << "=\"" << it->second << '"';
00618 }
00619 }
00620
00621
00622 writeTo << "?>";
00623 break;
00624 }
00625
00626 case ntDocTypeNode: {
00627
00628 writeTo << "<!DOCTYPE " << cnode->name << '>';
00629 break;
00630 }
00631
00632 default: {
00633 throw eUnknownNodeType;
00634 }
00635
00636 }
00637
00638 if (pretty) writeTo << endl;
00639
00640 if (childIndex < (int) cnode->children.size()) {
00641 if (cnode->type != ntDocumentNode) indent++;
00642
00643
00644 parents.push(cnode);
00645 childIndices.push(childIndex);
00646 cnode = cnode->children[childIndex];
00647 childIndex = 0;
00648
00649 } else {
00650
00651 while (parents.size() > 0) {
00652 cnode = parents.top();
00653 childIndex = childIndices.top();
00654 childIndices.pop();
00655 childIndex++;
00656 if (childIndex < (int) cnode->children.size()) {
00657
00658 childIndices.push(childIndex);
00659 cnode = cnode->children[childIndex];
00660 childIndex = 0;
00661 break;
00662
00663 } else {
00664
00665 parents.pop();
00666 indent--;
00667
00668 if (cnode->type == ntElementNode) {
00669 if (pretty) {
00670 for (int i = 0; i < indent; i++) {
00671 writeTo << '\t';
00672 }
00673 }
00674 writeTo << "</" << cnode->name << '>';
00675 if (pretty) writeTo << endl;
00676 }
00677 }
00678 }
00679 }
00680
00681 if (parents.size() == 0) break;
00682
00683 }
00684 }
00685
00699 NodeSearch* XmlNode::findInit(const string& name, const bool ignoreNamespaces) {
00700 NodeSearch* ns = new NodeSearch;
00701 ns->ignoreNamespaces = ignoreNamespaces;
00702 ns->owner = this;
00703 ns->name = name;
00704 ns->parents.push(this);
00705 ns->childIndices.push(-1);
00706
00707 return ns;
00708 }
00709
00721 XmlNode* XmlNode::findNext(NodeSearch* ns) {
00722 assert(ns != NULL && ns->owner == this);
00723 assert(ns->parents.size() > 0);
00724 assert(ns->childIndices.size() > 0);
00725
00726 XmlNode* cnode;
00727 int childIndex;
00728
00729 while (ns->parents.size() > 0) {
00730 cnode = ns->parents.top();
00731 childIndex = ns->childIndices.top();
00732 if (childIndex == -1) {
00733 childIndex++;
00734 ns->childIndices.pop();
00735 ns->childIndices.push(childIndex);
00736
00737 if (ns->ignoreNamespaces) {
00738 string::size_type pos = cnode->name.find(":", 0);
00739 if (pos != string::npos) {
00740
00741 string s = cnode->name.substr(pos + 1, cnode->name.length() - pos - 1);
00742 if (s == ns->name) {
00743 return cnode;
00744 }
00745
00746 } else {
00747 if (cnode->name == ns->name) {
00748 return cnode;
00749 }
00750 }
00751
00752 } else {
00753 if (cnode->name == ns->name) {
00754 return cnode;
00755 }
00756 }
00757
00758 } else {
00759 if (childIndex < (int) cnode->children.size()) {
00760
00761 ns->parents.push(cnode->children[childIndex]);
00762 childIndex++;
00763 ns->childIndices.pop();
00764 ns->childIndices.push(childIndex);
00765 ns->childIndices.push(-1);
00766
00767 } else {
00768
00769 ns->parents.pop();
00770 ns->childIndices.pop();
00771 }
00772 }
00773 }
00774
00775 return NULL;
00776 }
00777
00785 void XmlNode::findFree(NodeSearch* ns) {
00786 assert(ns != NULL && ns->owner == this);
00787 delete ns;
00788 }
00789
00801 XmlNode* XmlNode::findFirst(const string& name, const bool ignoreNamespaces) {
00802 NodeSearch* ns = findInit(name, ignoreNamespaces);
00803 XmlNode* res = findNext(ns);
00804 findFree(ns);
00805
00806 return res;
00807 }
00808
00809 }