如何让在.NET中的XML元素的流位置元素、位置、NET、XML

2023-09-03 22:59:18 作者:唯恐天下不亂i

我怎样才能在一个合理有效的方式了的XElement的.NET 4的流位置?

  1 2 3 4 5 6 7 8
01234567890123456789012345678901234567890123456789012345678901234567890123456789012
&其中;根>&所述;组id =0组合=或>&其中;过滤器的id =1/>&其中;过滤器的id =2/>&所述; /组>&所述; /根&GT ;
 

我想从创建上述映射到段

  {{/根,段(0,82)},
  {/根/组-0,段(6,75)},
  {/根/基-0 /滤波器-1,段(34,50)},
  {/根/基-0 /滤波器-2,段(51,​​67)}}
 

备注

段的第二个字段可以是长度,而不是结束指数 方法可以更一般/扩展到其他字节再presentations

博客文章关于我与内存概要截图​​答案

ASP.NET里XML读取问题如图

http://corsis.posterous.com/xml-keyvalue-cache-optimizations

奖金

使用的COM pression ,使 O(1)访问以元素的形式,但需要的只有一个整个文档的副本不重复在内存中的所有子元素。

红利示例

 店[/根。DECOM preSS()** O(1)**
店[/根/组-0。DECOM preSS()** O(1)**
 

解决方案

这是我最初的尝试:

 使用系统;
使用System.Collections.Generic;
使用System.Collections.Concurrent;
使用System.Linq的;
使用System.IO;
使用的System.Xml;
使用System.Xml.Linq的;
使用System.Text;

命名空间的XMLTest
{
    公共结构段
    {
        市民段(长指数,长)
        {
            指数=指数;
            长度=长度
        }

        众长指数;
        众长长度;

        公共重写字符串的ToString()
        {
            返回的String.Format(段({0},{1}),指数,长度);
        }
    }

    公共静态类GeneralSerializationExtensions
    {
        公共静态字符串段(字符串缓冲区,段段)
        {
            返回buffer.Substring((INT)segment.Index,(INT)segment.Length);
        }

        公共静态的byte []字节(该流流,诠释了startIndex = 0,布尔挫折= FALSE)
        {
            VAR字节=新字节[stream.Length]
            如果(stream.CanSeek&安培;&安培; stream.CanRead)
            {
                VAR位置= stream.Position;
                stream.Seek(在startIndex,SeekOrigin.Begin);
                stream.Read(字节,0,(int)的stream.Length);
                如果(挫折)
                    stream.Position =位置;
            }
            返回字节;
        }
    }

    类节目
    {
        静态无效的主要(字串[] args)
        {
            VAR流=新的MemoryStream();
            变种元件= XElement.Parse(@&其中;根>&所述;组id =0组合=或>&其中;过滤器的id =1/>&其中;过滤器的id = 2/>&所述; /组>&所述; /根>中);
            // VAR元= XElement.Parse(< A> I< b n ='1'O ='2'P =''/>< b ID ='2'> c为C /> < / B>< b n =3/>< b n =4O ='U'> 2< / B>< / A>中);

            VAR馅饼=新PathIndexedXElement(元);

            的foreach(在pie.Paths.OrderBy变种路径(P =指p))
            {
                VAR S = pie.store [路径]
                VAR T =馅饼[路径]
                Console.WriteLine(> {2,-30} {0,-20} {1},S,T,路径);
            }
        }
    }

    公共类PathIndexedXElement
    {
        内部字符串缓冲区;
        内部ConcurrentDictionary<字符串,段>商店;

        公共PathIndexedXElement(的XElement元)
        {
            缓冲= XmlPathSegmenter.StringBuffer(元件);
            店内= element.PathSegments();
        }

        公开的IEnumerable<字符串>路径
        {
            {返回store.Keys; }
        }

        公共字符串此[字符串路径]
        {
            {返回buffer.Segment(店[路径]); }
        }

        公共BOOL TryGetValue(字符串路径,出字符串的XElement)
        {
            段的段;
            如果(store.TryGetValue(道路,出于段))
            {
                的XElement = buffer.Segment(段);
                返回true;
            }
            的XElement = NULL;
            返回false;
        }
    }

    公共静态类XmlPathSegmenter
    {
        公共静态的XmlWriter CreateWriter(流流)
        {
            VAR设置=新XmlWriterSettings(){编码= Encoding.UTF8,缩进=假,OmitXmlDeclaration = TRUE,NewLineHandling = NewLineHandling.None};

            返回XmlWriter.Create(流设置);
        }

        公共静态的MemoryStream MemoryBuffer(的XElement元)
        {
            VAR流=新的MemoryStream();
            VAR作家= CreateWriter(流);
            element.Save(作家);
            writer.Flush();
            stream.Position = 0;
            返回流;
        }

        公共静态字符串的StringBuffer(的XElement元)
        {
            返回Encoding.UTF8.GetString(MemoryBuffer(元).Bytes())子串(1)。
        }

        公共静态ConcurrentDictionary<字符串,段> PathSegments(字符串XMLELEMENT,ConcurrentDictionary<字符串,段>商店= NULL)
        {
            返回PathSegments(XElement.Parse(XMLELEMENT),存储);
        }

        公共静态ConcurrentDictionary<字符串,段> PathSegments(这的XElement元素,ConcurrentDictionary<字符串,段>商店= NULL)
        {
            VAR流=新的MemoryStream();
            VAR作家= CreateWriter(流);
            element.Save(作家);
            writer.Flush();
            stream.Position = 0;

            返回PathSegments(流店);
        }

        公共静态ConcurrentDictionary<字符串,段> PathSegments(流流,ConcurrentDictionary<字符串,段>商店= NULL)
        {
            如果(店面== NULL)
                店内=新ConcurrentDictionary<字符串,段>();

            VAR堆栈=新ConcurrentStack< KeyValuePair<字符串,INT>>();
            PathSegments(流叠加,商店);

            回到店里;
        }

        //
        静态无效PathSegments(流流,ConcurrentStack< KeyValuePair<字符串,INT>>堆栈,ConcurrentDictionary<字符串,段>存储)
        {
            变种读者= XmlReader.Create(流,新XmlReaderSettings(){});
            VAR行=阅读器IXmlLineInfo;

            而(reader.Read())
            {
                KeyValuePair<字符串,INT> EP;
            好:
                如果(reader.IsStartElement())
                {
                    stack.TryPeek(出EP);
                    stack.Push(新KeyValuePair<字符串,INT>(ep.Key +路径(读卡器),line.LinePosition  -  2));
                }

                如果(reader.IsEmptyElement)
                {
                    VAR名称= reader.LocalName;
                    变种D = reader.Depth;
                    reader.Read();
                    如果(stack.TryPop(出EP))
                    {
                        变种长度= line.LinePosition  -  2  -  ep.Value  - (d取代; reader.Depth?1:0);
                        Console.WriteLine(/ {3} | {0}:{1}  - > {2},名称,ep.Value,长度,line.LineNumber);

                        store.TryAdd(ep.Key,新段(ep.Value,长度));
                    }
                    转到确定;
                }

                如果(reader.NodeType == XmlNodeType.EndElement)
                {
                    如果(stack.TryPop(出EP))
                    {
                        VAR长度= line.LinePosition + reader.LocalName.Length  -  ep.Value;
                        Console.WriteLine(| {3} | {0}:{1}  - > {2},reader.LocalName,ep.Value,长度,line.LineNumber);

                        store.TryAdd(ep.Key,新段(ep.Value,长度));
                    }
                }

            }
        }
        //

        公共静态字符串路径(XmlReader的元素)
        {
            如果(!(element.IsStartElement()|| element.IsEmptyElement))
                返回null;

            如果(!element.HasAttributes)
                返回/+ element.LocalName;
            VAR ID = element.GetAttribute(ID);
            返回的String.Format(ID == NULL/ {0}:?/ {0}  -  {1},element.LocalName,身份证);
        }
    }
}
 

输出:

  / 1 |过滤器:34  - > 17
/ 1 |过滤器:51  - > 17
| 1 |组:6  - > 70
| 1 |根:0  - > 83
> /根段(0,83)&其中;根>&所述;组id =0组合=或>&其中;过滤器的id =1/>&其中;过滤器的id =2/>&所述; /组>< /根>
> /根/基0段(6,70)&其中;组id =0组合=或>&其中;过滤器的id =1/>&其中;过滤器的id =2/>&所述; /组>
> /根/基-0 /滤波器-1段(34,17)其中;过滤器的id =1/>
> /根/基-0 /滤波器-2段(51,​​17)&其中;过滤器的id =2/>
 

的启用被发现的 IXmlLineInfo 接口由XmlReader类明确实现的,这是很难的一条信息找到。

备注

现在是有点preprotective :)我所收到的关于this问题:

藏品并发版本发挥在这个例子中没有任何作用。我知道,幸福与使用它们:) 径处理方案很容易被推广,但这涵盖了我所有的需要。 我知道*身份证* S通常被用作文件范围内的唯一标识符,我很高兴我在这个特殊的环境中使用它们。 段可以很容易地扩展到有另一个length属性指着结束>开始标记的符号,从而提取任何给定的元素在文档树的背景重建的周围其他任何目标元素。浅的树木,这应该给一个不错的常数因子用于访问目标的元素也与上下文信息。 在我完全知道所有可能会或可能不会是一个值得尝试:我没有我的情况下任何号码呢。只是想开发一种方法,并与人分享。

How can I get the stream position of an XElement in .NET 4 in a reasonably efficient way?

          1         2         3         4         5         6         7         8
01234567890123456789012345678901234567890123456789012345678901234567890123456789012
<root><group id="0" combiner="or"><filter id="1" /><filter id="2" /></group></root>

I want to create from the above a map to segments

{ { "/root",                  Segment(0 , 82) },
  { "/root/group-0",          Segment(6 , 75) },
  { "/root/group-0/filter-1", Segment(34, 50) },
  { "/root/group-0/filter-2", Segment(51, 67) } }

Notes

second fields of segments can be lengths instead of end indices methods can be more general/extended to other byte representations

Blog Post About My Answer with Memory Profiling Screenshots

http://corsis.posterous.com/xml-keyvalue-cache-optimizations

Bonus

use a form of compression that allows O(1) access to elements but requires only a single copy of the whole document without any subelements duplicated in the memory.

Bonus Example

store["/root"].Decompress()         **O(1)**
store["/root/group-0"].Decompress() **O(1)**

解决方案

Here's my initial attempt:

using System;
using System.Collections.Generic;
using System.Collections.Concurrent;
using System.Linq;
using System.IO;
using System.Xml;
using System.Xml.Linq;
using System.Text;

namespace XMLTest
{
    public struct Segment
    {
        public Segment(long index, long length)
        {
            Index = index;
            Length = length;
        }

        public long Index;
        public long Length;

        public override string ToString()
        {
            return string.Format("Segment({0}, {1})", Index, Length);
        }
    }

    public static class GeneralSerializationExtensions
    {
        public static string Segment(this string buffer, Segment segment)
        {
            return buffer.Substring((int)segment.Index, (int)segment.Length);
        }

        public static byte[] Bytes(this Stream stream, int startIndex = 0, bool setBack = false)
        {
            var bytes = new byte[stream.Length];
            if (stream.CanSeek && stream.CanRead)
            {
                var position = stream.Position;
                stream.Seek(startIndex, SeekOrigin.Begin);
                stream.Read(bytes, 0, (int)stream.Length);
                if (setBack)
                    stream.Position = position;
            }
            return bytes;
        }        
    }

    class Program
    {
        static void Main(string[] args)
        {
            var stream = new MemoryStream();
            var element = XElement.Parse(@"<root><group id=""0"" combiner=""or""><filter id=""1"" /><filter id=""2"" /></group></root>");            
            //var element = XElement.Parse("<a>i<b id='1' o='2' p=''/><b id='2'><c /></b><b id='3' /><b id='4' o='u'>2</b></a>");

            var pie = new PathIndexedXElement(element);

            foreach (var path in pie.Paths.OrderBy(p => p))
            {
                var s = pie.store[path];
                var t = pie[path];
                Console.WriteLine("> {2,-30} {0,-20} {1}", s, t, path);
            }
        }
    }

    public class PathIndexedXElement
    {
        internal string buffer;
        internal ConcurrentDictionary<string, Segment> store;

        public PathIndexedXElement(XElement element)
        {
            buffer = XmlPathSegmenter.StringBuffer(element);
            store = element.PathSegments();
        }

        public IEnumerable<string> Paths
        {
            get { return store.Keys; }
        }

        public string this[string path]
        {
            get { return buffer.Segment(store[path]); }
        }

        public bool TryGetValue(string path, out string xelement)
        {
            Segment segment;
            if (store.TryGetValue(path, out segment))
            {
                xelement = buffer.Segment(segment);
                return true;
            }
            xelement = null;
            return false;
        }
    }

    public static class XmlPathSegmenter
    {
        public static XmlWriter CreateWriter(Stream stream)
        {
            var settings = new XmlWriterSettings() { Encoding = Encoding.UTF8, Indent = false, OmitXmlDeclaration = true, NewLineHandling = NewLineHandling.None };

            return XmlWriter.Create(stream, settings);
        }

        public static MemoryStream MemoryBuffer(XElement element)
        {
            var stream = new MemoryStream();
            var writer = CreateWriter(stream);
            element.Save(writer);
            writer.Flush();
            stream.Position = 0;
            return stream;
        }

        public static string StringBuffer(XElement element)
        {
            return Encoding.UTF8.GetString(MemoryBuffer(element).Bytes()).Substring(1);
        }

        public static ConcurrentDictionary<string, Segment> PathSegments(string xmlElement, ConcurrentDictionary<string, Segment> store = null)
        {
            return PathSegments(XElement.Parse(xmlElement), store);
        }

        public static ConcurrentDictionary<string, Segment> PathSegments(this XElement element, ConcurrentDictionary<string, Segment> store = null)
        {
            var stream = new MemoryStream();
            var writer = CreateWriter(stream);
            element.Save(writer);
            writer.Flush();
            stream.Position = 0;

            return PathSegments(stream, store);
        }

        public static ConcurrentDictionary<string, Segment> PathSegments(Stream stream, ConcurrentDictionary<string, Segment> store = null)
        {
            if (store == null)
                store = new ConcurrentDictionary<string, Segment>();

            var stack = new ConcurrentStack<KeyValuePair<string, int>>();
            PathSegments(stream, stack, store);

            return store;
        }

        //
        static void PathSegments(Stream stream, ConcurrentStack<KeyValuePair<string, int>> stack, ConcurrentDictionary<string, Segment> store)
        {
            var reader = XmlReader.Create(stream, new XmlReaderSettings() { });
            var line = reader as IXmlLineInfo;

            while (reader.Read())
            {
                KeyValuePair<string, int> ep;
            ok:
                if (reader.IsStartElement())
                {
                    stack.TryPeek(out ep);
                    stack.Push(new KeyValuePair<string, int>(ep.Key + Path(reader), line.LinePosition - 2));
                }

                if (reader.IsEmptyElement)
                {
                    var name = reader.LocalName;
                    var d = reader.Depth;
                    reader.Read();
                    if (stack.TryPop(out ep))
                    {
                        var length = line.LinePosition - 2 - ep.Value - (d > reader.Depth ? 1 : 0);
                        Console.WriteLine("/{3}|{0} : {1} -> {2}", name, ep.Value, length, line.LineNumber);

                        store.TryAdd(ep.Key, new Segment(ep.Value, length));
                    }
                    goto ok;
                }

                if (reader.NodeType == XmlNodeType.EndElement)
                {
                    if (stack.TryPop(out ep))
                    {
                        var length = line.LinePosition + reader.LocalName.Length - ep.Value;
                        Console.WriteLine("|{3}|{0} : {1} -> {2}", reader.LocalName, ep.Value, length, line.LineNumber);

                        store.TryAdd(ep.Key, new Segment(ep.Value, length));
                    }
                }

            }
        }
        //

        public static string Path(XmlReader element)
        {
            if (!(element.IsStartElement() || element.IsEmptyElement))
                return null;

            if (!element.HasAttributes)
                return "/" + element.LocalName;
            var id = element.GetAttribute("id");
            return string.Format(id == null ? "/{0}" : "/{0}-{1}", element.LocalName, id);
        }
    }
}

Output:

/1|filter : 34 -> 17
/1|filter : 51 -> 17
|1|group : 6 -> 70
|1|root : 0 -> 83
> /root                          Segment(0, 83)       <root><group id="0" combiner="or"><filter id="1" /><filter id="2" /></group></root>
> /root/group-0                  Segment(6, 70)       <group id="0" combiner="or"><filter id="1" /><filter id="2" /></group>
> /root/group-0/filter-1         Segment(34, 17)      <filter id="1" />
> /root/group-0/filter-2         Segment(51, 17)      <filter id="2" />

The enabler was discovering the IXmlLineInfo interface explicitly implemented by the XmlReader class, which was hard a piece of information to find.

Notes

Now being a bit preprotective :) after all the comments I received about this question:

Concurrent versions of the collections play no role in this example. I am aware and happy with using them : ) Pathing scheme could easily be generalized but this covers all my needs. I am aware *id*s are normally used as document-wide unique identifiers, I am happy with my use of them in this particular context. Segments could easily be extended to have another length property pointing at the closing > sign of start tags, enabling extraction of only the attributes of any given element in the document tree for context reconstruction around any other target element. With shallow trees, this should give a nice constant factor for accessing target elements also with context information. I am fully aware all that may or may not be a worthwhile attempt: I do not have any numbers for my scenario yet. Just wanted to develop a method and share it with people.