c# - Build XPath for node from XmlReader -


i writing application parses dynamic xml various sources , traverses xml , returns unique elements.

given large size of xml files using xmlreader parse xml structure due memory constraints.

public idictionary<string, int> discover(string filepath)     {         dictionary<string, string> nodetable = new dictionary<string, string>();         using (xmlreader reader = xmlreader.create(filepath))         {             while (!reader.eof)             {                 if (reader.nodetype == xmlnodetype.element)                 {                     if (!nodetable.containskey(reader.localname))                     {                         nodetable.add(reader.localname,  reader.depth);                     }                  }                 reader.read();             }         }         debug.writeline("the node table has {0} items.", nodetable.count);           return nodetable;     } 

this works treat , nice , performant, final piece of puzzle eludes me, trying generate xpath each element.

now, @ first seemed straight forward using this.

var elements = new stack<string>();  while (reader.read()) {     switch (reader.nodetype)     {         case xmlnodetype.element:             elements.push(reader.localname);             break;         case xmlnodetype.endelement:             elements.pop();             break;         case xmlnodetype.text:             path = string.join("/", elements.reverse());             break;     } } 

but gives me 1 part of solution. given wish return xpath every node in tree contains data , detect if given node tree contains nested collections of data.

i.e.

<customers>   <customer id=2>     <name>ted smith</name>     <addresses>       <address1>            <line1></line1>       </address1>       <address2>            <line1></line1>            <line2></line2>       </address2>     </addresses>   </customer>   <customer id=322>     <name>smith mcsmith</name>     <addresses>       <address1>            <line1></line1>            <line2></line2>       </address1>       <address2>            <line1></line1>            <line2></line2>       </address2>     </addresses>   </customer> </customers> 

keeping in mind data dynamic , schema unknown.

so output should include

/customer/name /customer/address1/line1 /customer/address1/line2 /customer/address2/line1 /customer/address2/line2 

i using recursive method rather push/pop. see code below

using system; using system.collections.generic; using system.linq; using system.text; using system.xml; using system.io;  namespace consoleapplication1 {     class program     {         static void main(string[] args)         {             string input =                 "<customers>" +                   "<customer id=\"2\">" +                     "<name>ted smith</name>" +                     "<addresses>" +                       "<address1>" +                            "<line1></line1>" +                       "</address1>" +                       "<address2>" +                            "<line1></line1>" +                            "<line2></line2>" +                       "</address2>" +                     "</addresses>" +                   "</customer>" +                   "<customer id=\"322\">" +                     "<name>smith mcsmith</name>" +                     "<addresses>" +                       "<address1>" +                            "<line1></line1>" +                            "<line2></line2>" +                       "</address1>" +                       "<address2>" +                            "<line1></line1>" +                            "<line2></line2>" +                       "</address2>" +                     "</addresses>" +                   "</customer>" +                 "</customers>";              stringreader sreader = new stringreader(input);             xmlreader reader = xmlreader.create(sreader);             node root = new node();             readnode(reader, root);          }         static bool readnode(xmlreader reader, node node)         {             boolean done = false;             boolean endelement = false;              while(done = reader.read())             {                 switch (reader.nodetype)                 {                     case xmlnodetype.element:                         if (node.name.length == 0)                         {                             node.name = reader.name;                             getattrubutes(reader, node);                         }                         else                         {                             node newnode = new node();                             newnode.name = reader.name;                             if (node.children == null)                             {                                 node.children = new list<node>();                             }                             node.children.add(newnode);                             getattrubutes(reader, newnode);                             done = readnode(reader, newnode);                         }                         break;                     case xmlnodetype.endelement:                         endelement = true;                         break;                     case xmlnodetype.text:                         node.text = reader.value;                         break;                     case xmlnodetype.attribute:                         if (node.attributes == null)                         {                             node.attributes = new dictionary<string, string>();                         }                         node.attributes.add(reader.name, reader.value);                         break;                 }                 if (endelement)                     break;             }             return done;         }         static void getattrubutes(xmlreader reader, node node)         {             (int = 0; < reader.attributecount; i++)             {                 if (i == 0) node.attributes = new dictionary<string, string>();                 reader.movetonextattribute();                 node.attributes.add(reader.name, reader.value);             }         }     }     public class node     {         public string name = string.empty;         public string text = string.empty;         public dictionary<string, string> attributes = null;         public list<node> children = null;     } } ​ 

Comments

Popular posts from this blog

python - Healpy: From Data to Healpix map -

c - Bitwise operation with (signed) enum value -

xslt - Unnest parent nodes by child node -