提取RTF格式嵌入式图像对象嵌入式、图像、对象、格式

2023-09-04 00:28:25 作者:雨蚀

我有 RTF ,其中包括一个嵌入对象(图像)的文件。我需要提取这是一个图片对象(或任何其他可用的格式)。我已签出此 $ C $的CProject文章但默认的应用程序不能正确地呈现它(它们呈现默认图像的形象,而不是图像本身),让我感动的。

下面是的RTF code样品(我不得不缩短,因为尺寸的话):

  {\ RTF1 \ ANSI \ deff0 {\ fonttbl {\ F0 \ fnil \ fcharset0宋体;}}
\ viewkind4 \ UC1 \ PARD \ lang1033 \ F0 \ FS18 {\对象\ objemb {\ * \ objclass包} \ objw855 \ objh810 {\ * \ objdata
01050000
02000000
08000000
5061636b61676500
00000000
00000000
1f900000
02007369675f5f2e6a706700433a5c55736572735c726​​563657074696f6e5c4465736b746f705c
5369676e6174757265735c7369675f5f2e6a7067000000030034000000433a5c55736572735c52
45434550547e315c417070446174615c4c6f63616c5c54656d705c7369675f5f20283132292e6a
706700c18e0000ffd8ffe000104a46494600010101004800470000ffdb00430001010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101010101010101010101ffdb00430101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101ffc0001108012c03e803012200021101031101ffc4001f00010002030002
0301000000000000000000090a07080b050602030401ffc4003f10000006030001040201030301
04070900000203040506010708090a11121314152116172223314118192532591a24576598d6d8
2933384651788497b7ffc4001a010101000301010000000000000000000000030204050106ffc4
002b11010003010100020103030402030000000002030401051112130614211522230731415124
32536162ffda000c03010002110311003f00bfc000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
...
005c0072006500630065007000740069006f006e005c004400650073006b0074006f0070005c00
5300690067006e006100740075007200650073005c007300690067005f005f002e006a00700067
00
01050000
00000000
} {\结果{\ PICT \ wmetafile8 \ picw2010 \ pich1905 \ picwgoal855 \ pichgoal810
0100090000033b0700000200210600000000050000000b0200000000050000000c02350038001c
000000fb02f4ff000000000000900100000001000000005365676f65205549000e0a52104c2308
00dd1900d894ef758001f3758d0e664a040000002d010000050000000902000000000500000001
02ffffff00a5000000410bc600880020002000000000002000200000000c002800000020000000
400000000100010000000000000100000000000000000000000000000000000000000000ffffff
...
0021001c001c000000fb021000070000000000bc02000000000102022253797374656d00008d0e
664a00000a0022008a0100000000ffffffff8cdd1900040000002d010100030000000000
}}} \相提并论
}
 

解决方案 RTF格式怎么转换成DOC格式

下面是一张code,可以从一个RTF流中提取的所有对象(一揽子类对象):

 公共静态无效ExtractPackageObjects(字符串文件路径)
    {
        使用(StreamReader的SR =新的StreamReader(文件路径))
        {
            RtfReader读卡器=新RtfReader(SR);
            IEnumerator的< RtfObject>枚举= reader.Read()的GetEnumerator()。
            而(enumerator.MoveNext())
            {
                如果(enumerator.Current.Text ==对象)
                {
                    如果(RtfReader.MoveToNextControlWord(枚举,objclass))
                    {
                        字符串的className = RtfReader.GetNextText(枚举器);
                        如果(类名==包)
                        {
                            如果(RtfReader.MoveToNextControlWord(枚举,objdata))
                            {
                                byte []的数据= RtfReader.GetNextTextAsByteArray(枚举器);
                                使用(MemoryStream的packageData =新的MemoryStream())
                                {
                                    RtfReader.ExtractObjectData(新的MemoryStream(数据),packageData);
                                    packageData.Position = 0;
                                    PackagedObject PO = PackagedObject.Extract(packageData);
                                    File.WriteAllBytes(po.DisplayName,po.Data);
                                }
                            }
                        }
                    }
                }
            }
        }
    }
 

在这里,此code使用的实用程序类。有一个简单的基于流的RTF分析器,它允许到达有趣的控制字。

还有一个实用工具从一个序列化对象包装程序实例中提取数据。对象包装程序是一个几乎20年前 OLE1.0 的事情,序列化的二进制格式未记录(据我所知),但它是可以理解的。

这工作正常您提供的样本,但你可能要适应周围的事物。

 公共类RtfReader
{
    公共RtfReader(TextReader的读者)
    {
        如果(读者== NULL)
            抛出新ArgumentNullException(读者);

        读卡器=阅读器;
    }

    公众的TextReader阅读器{获得;私定; }

    公开的IEnumerable< RtfObject>读()
    {
        StringBuilder的控制字=新的StringBuilder();
        StringBuilder的文本=新的StringBuilder();
        堆叠< RtfParseState>堆栈=新的堆栈< RtfParseState>();
        RtfParseState状态= RtfParseState.Group;

        做
        {
            INT I = Reader.Read();
            如果(ⅰ℃,)
            {
                如果(!string.IsNullOrWhiteSpace(controlWord.ToString()))
                    产量返回新RtfControlWord(controlWord.ToString());

                如果(!string.IsNullOrWhiteSpace(text.ToString()))
                    产量返回新RtfText(text.ToString());

                产生中断;
            }

            炭C =(焦炭)我;

            //噪声字符
            如果((三=='\ r')||
                (三=='\ N'))
                继续;

            开关(州)
            {
                案例RtfParseState.Group:
                    如果(C =='{')
                    {
                        stack.Push(州);
                        打破;
                    }

                    如果(C =='\\')
                    {
                        状态= RtfParseState.ControlWord;
                        打破;
                    }
                    打破;

                案例RtfParseState.ControlWord:
                    如果(C =='\\')
                    {
                        //另一个控制字
                        如果(!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            产量返回新RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        打破;
                    }

                    如果(C =='{')
                    {
                        //一个新的组
                        状态= RtfParseState.Group;
                        如果(!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            产量返回新RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        打破;
                    }

                    如果(C =='})
                    {
                        //关闭组
                        状态= stack.Count> 0? stack.Pop():RtfParseState.Group;
                        如果(!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            产量返回新RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        打破;
                    }

                    如果(!Char.IsLetterOrDigit(c))的
                    {
                        状态= RtfParseState.Text;
                        text.Append(C);
                        如果(!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            产量返回新RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        打破;
                    }

                    controlWord.Append(C);
                    打破;

                案例RtfParseState.Text:
                    如果(C =='\\')
                    {
                        状态= RtfParseState.EscapedText;
                        打破;
                    }

                    如果(C =='{')
                    {
                        如果(!string.IsNullOrWhiteSpace(text.ToString()))
                        {
                            产量返回新RtfText(text.ToString());
                            text.Clear();
                        }

                        //一个新的组
                        状态= RtfParseState.Group;
                        打破;
                    }

                    如果(C =='})
                    {
                        如果(!string.IsNullOrWhiteSpace(text.ToString()))
                        {
                            产量返回新RtfText(text.ToString());
                            text.Clear();
                        }

                        //关闭组
                        状态= stack.Count> 0? stack.Pop():RtfParseState.Group;
                        打破;
                    }
                    text.Append(C);
                    打破;

                案例RtfParseState.EscapedText:
                    如果((三=='\\')||(C =='}')||(C =='{'))
                    {
                        状态= RtfParseState.Text;
                        text.Append(C);
                        打破;
                    }

                    // ANSI字符转义
                    如果(C =='\'')
                    {
                        text.Append(FromHexa((炭)Reader.Read(),(炭)Reader.Read()));
                        打破;
                    }

                    如果(!string.IsNullOrWhiteSpace(text.ToString()))
                    {
                        产量返回新RtfText(text.ToString());
                        text.Clear();
                    }

                    //实际上,这是一个正常的控制字
                    controlWord.Append(C);
                    状态= RtfParseState.ControlWord;
                    打破;
            }
        }
        而(真);
    }

    公共静态布尔MoveToNextControlWord(IEnumerator的< RtfObject>枚举器,串字)
    {
        如果(枚举== NULL)
            抛出新ArgumentNullException(调查员);

        而(enumerator.MoveNext())
        {
            如果(enumerator.Current.Text ==字)
                返回true;
        }
        返回false;
    }

    公共静态字符串GetNextText(IEnumerator的< RtfObject>枚举)
    {
        如果(枚举== NULL)
            抛出新ArgumentNullException(调查员);

        而(enumerator.MoveNext())
        {
            RtfText文本= enumerator.Current为RtfText;
            如果(文字!= NULL)
                返回text.Text;
        }
        返回null;
    }

    公共静态的byte [] GetNextTextAsByteArray(IEnumerator的< RtfObject>枚举)
    {
        如果(枚举== NULL)
            抛出新ArgumentNullException(调查员);

        而(enumerator.MoveNext())
        {
            RtfText文本= enumerator.Current为RtfText;
            如果(文字!= NULL)
            {
                名单<字节>字节=新的名单,其中,字节>();
                的for(int i = 0; I< text.Text.Length;我+ = 2)
                {
                    bytes.Add((字节)FromHexa(text.Text [I],text.Text [I + 1]));
                }
                返回bytes.ToArray();
            }
        }
        返回null;
    }

    //提取的EmbeddedObject / ObjectHeader从流
    //参见[MS -OLEDS]:对象链接和嵌入(OLE)数据结构的详细信息
    //第2.2章:OLE1.0格式结构
    公共静态无效ExtractObjectData(流的InputStream,流的OutputStream)
    {
        如果(的InputStream == NULL)
            抛出新ArgumentNullException(InputStream的);

        如果(OutputStream的== NULL)
            抛出新ArgumentNullException(的OutputStream);

        读者BinaryReader在新= BinaryReader在(InputStream的);
        reader.ReadInt32(); // OLEVersion
        INT formatId = reader.ReadInt32(); // FormatID
        如果(formatId!= 2)//见2.2.4对象标题。 2指EmbeddedObject
            抛出新NotSupportedException异常();

        读数长度prefixedAnsiString(读卡器); //类名
        读数长度prefixedAnsiString(读卡器); // topicName
        读数长度prefixedAnsiString(读卡器); // 项目名

        INT nativeDataSize = reader.ReadInt32();
        字节[]字节= reader.ReadBytes(nativeDataSize);
        outputStream.Write(字节,0,bytes.Length);
    }

    //见2.1.4长度prefixedAnsiString
    私人静态字符串读数长度prefixedAnsiString(读者BinaryReader在)
    {
        INT长度= reader.ReadInt32();
        如果(长度== 0)
            返回的String.Empty;

        字节[]字节= reader.ReadBytes(长度);
        返回Encoding.Default.GetString(字节,0,长度为 -  1);
    }

    私人枚举RtfParseState
    {
        控制字,
        文本,
        EscapedText,
        组
    }

    私有静态字符FromHexa(字符喜,焦LO)
    {
        返回(炭)byte.Parse(hi.ToString()+ LO,NumberStyles.HexNumber);
    }
}

//工具类来分析一个OLE1.0 OLEOBJECT
公共类PackagedObject
{
    私人PackagedObject()
    {
    }

    公共字符串显示名称{获得;私定; }
    公共字符串IconFilePath {获得;私定; }
    公众诠释IconIndex {获得;私定; }
    公共字符串的文件路径{获得;私定; }
    公共byte []的数据{获得;私定; }

    私人静态字符串ReadAnsiString(读者BinaryReader在)
    {
        StringBuilder的SB =新的StringBuilder();
        做
        {
            字节B = reader.ReadByte();
            如果(B == 0)
                返回sb.ToString();

            sb.Append((char)的B);
        }
        而(真);
    }

    公共静态PackagedObject提取物(流的InputStream)
    {
        如果(的InputStream == NULL)
            抛出新ArgumentNullException(InputStream的);

        读者BinaryReader在新= BinaryReader在(InputStream的);
        reader.ReadUInt16(); // SIG
        PackagedObject PO =新PackagedObject();
        po.DisplayName = ReadAnsiString(读卡器);
        po.IconFilePath = ReadAnsiString(读卡器);
        po.IconIndex = reader.ReadUInt16();
        整型= reader.ReadUInt16();
        如果(类型!= 3)// 3档,1个链接
            抛出新NotSupportedException异常();

        reader.ReadInt32(); // nextsize
        po.FilePath = ReadAnsiString(读卡器);
        INT数据大小= reader.ReadInt32();
        po.Data = reader.ReadBytes(数据大小);
        //注意在这之后,也有可能是单向code +长的路径信息
        返回PO;
    }
}

公共类RtfObject
{
    公共RtfObject(文本字符串)
    {
        如果(文字== NULL)
            抛出新ArgumentNullException(文字);

        文本= text.Trim();
    }

    公共字符串文本{获得;私定; }
}

公共类RtfText:RtfObject
{
    公共RtfText(文本字符串)
        :碱(文本)
    {
    }
}

公共类RtfControlWord:RtfObject
{
    公共RtfControlWord(字符串名称)
        :基地(名)
    {
    }
}
 

I have rtf documents that include an embedded object (an image). I need to extract this as an Image object (or any other usable format). I have checked out this CodeProject article but the default apps don't render it correctly (They render the 'default image' image, not the image itself), so I moved on.

Here is a sample of the RTF Code (I had to shorten it because of size):

{\rtf1\ansi\deff0{\fonttbl{\f0\fnil\fcharset0 MS Sans Serif;}}
\viewkind4\uc1\pard\lang1033\f0\fs18{\object\objemb{\*\objclass Package}\objw855\objh810{\*\objdata 
01050000
02000000
08000000
5061636b61676500
00000000
00000000
1f900000
02007369675f5f2e6a706700433a5c55736572735c726563657074696f6e5c4465736b746f705c
5369676e6174757265735c7369675f5f2e6a7067000000030034000000433a5c55736572735c52
45434550547e315c417070446174615c4c6f63616c5c54656d705c7369675f5f20283132292e6a
706700c18e0000ffd8ffe000104a46494600010101004800470000ffdb00430001010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101010101010101010101ffdb00430101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101ffc0001108012c03e803012200021101031101ffc4001f00010002030002
0301000000000000000000090a07080b050602030401ffc4003f10000006030001040201030301
04070900000203040506010708090a11121314152116172223314118192532591a24576598d6d8
2933384651788497b7ffc4001a010101000301010000000000000000000000030204050106ffc4
002b11010003010100020103030402030000000002030401051112130614211522230731415124
32536162ffda000c03010002110311003f00bfc000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
...
005c0072006500630065007000740069006f006e005c004400650073006b0074006f0070005c00
5300690067006e006100740075007200650073005c007300690067005f005f002e006a00700067
00
01050000
00000000
}{\result{\pict\wmetafile8\picw2010\pich1905\picwgoal855\pichgoal810 
0100090000033b0700000200210600000000050000000b0200000000050000000c02350038001c
000000fb02f4ff000000000000900100000001000000005365676f65205549000e0a52104c2308
00dd1900d894ef758001f3758d0e664a040000002d010000050000000902000000000500000001
02ffffff00a5000000410bc600880020002000000000002000200000000c002800000020000000
400000000100010000000000000100000000000000000000000000000000000000000000ffffff
...
0021001c001c000000fb021000070000000000bc02000000000102022253797374656d00008d0e
664a00000a0022008a0100000000ffffffff8cdd1900040000002d010100030000000000
}}}\par
}

解决方案

Here is a piece of code that can extract all objects ('Package' class objects) from an RTF stream:

    public static void ExtractPackageObjects(string filePath)
    {
        using (StreamReader sr = new StreamReader(filePath))
        {
            RtfReader reader = new RtfReader(sr);
            IEnumerator<RtfObject> enumerator = reader.Read().GetEnumerator();
            while(enumerator.MoveNext())
            {
                if (enumerator.Current.Text == "object")
                {
                    if (RtfReader.MoveToNextControlWord(enumerator, "objclass"))
                    {
                        string className = RtfReader.GetNextText(enumerator);
                        if (className == "Package")
                        {
                            if (RtfReader.MoveToNextControlWord(enumerator, "objdata"))
                            {
                                byte[] data = RtfReader.GetNextTextAsByteArray(enumerator);
                                using (MemoryStream packageData = new MemoryStream())
                                {
                                    RtfReader.ExtractObjectData(new MemoryStream(data), packageData);
                                    packageData.Position = 0;
                                    PackagedObject po = PackagedObject.Extract(packageData);
                                    File.WriteAllBytes(po.DisplayName, po.Data);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

And here are the utility classes that this code uses. There is a simple stream-based RTF parser that allows to get to the interesting control words.

There is also a utility to extract data from a serialized Object Packager instance. Object Packager is an almost 20-years ago OLE1.0 thing and the serialized binary format is not documented (to my knowledge), but it's understandable.

This works fine on your provided sample, but you may have to adapt things around.

public class RtfReader
{
    public RtfReader(TextReader reader)
    {
        if (reader == null)
            throw new ArgumentNullException("reader");

        Reader = reader;
    }

    public TextReader Reader { get; private set; }

    public IEnumerable<RtfObject> Read()
    {
        StringBuilder controlWord = new StringBuilder();
        StringBuilder text = new StringBuilder();
        Stack<RtfParseState> stack = new Stack<RtfParseState>();
        RtfParseState state = RtfParseState.Group;

        do
        {
            int i = Reader.Read();
            if (i < 0)
            {
                if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
                    yield return new RtfControlWord(controlWord.ToString());

                if (!string.IsNullOrWhiteSpace(text.ToString()))
                    yield return new RtfText(text.ToString());

                yield break;
            }

            char c = (char)i;

            // noise chars
            if ((c == '\r') ||
                (c == '\n'))
                continue;

            switch (state)
            {
                case RtfParseState.Group:
                    if (c == '{')
                    {
                        stack.Push(state);
                        break;
                    }

                    if (c == '\\')
                    {
                        state = RtfParseState.ControlWord;
                        break;
                    }
                    break;

                case RtfParseState.ControlWord:
                    if (c == '\\')
                    {
                        // another controlWord
                        if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            yield return new RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        break;
                    }

                    if (c == '{')
                    {
                        // a new group
                        state = RtfParseState.Group;
                        if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            yield return new RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        break;
                    }

                    if (c == '}')
                    {
                        // close group
                        state = stack.Count > 0 ? stack.Pop() : RtfParseState.Group;
                        if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            yield return new RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        break;
                    }

                    if (!Char.IsLetterOrDigit(c))
                    {
                        state = RtfParseState.Text;
                        text.Append(c);
                        if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
                        {
                            yield return new RtfControlWord(controlWord.ToString());
                            controlWord.Clear();
                        }
                        break;
                    }

                    controlWord.Append(c);
                    break;

                case RtfParseState.Text:
                    if (c == '\\')
                    {
                        state = RtfParseState.EscapedText;
                        break;
                    }

                    if (c == '{')
                    {
                        if (!string.IsNullOrWhiteSpace(text.ToString()))
                        {
                            yield return new RtfText(text.ToString());
                            text.Clear();
                        }

                        // a new group
                        state = RtfParseState.Group;
                        break;
                    }

                    if (c == '}')
                    {
                        if (!string.IsNullOrWhiteSpace(text.ToString()))
                        {
                            yield return new RtfText(text.ToString());
                            text.Clear();
                        }

                        // close group
                        state = stack.Count > 0 ? stack.Pop() : RtfParseState.Group;
                        break;
                    }
                    text.Append(c);
                    break;

                case RtfParseState.EscapedText:
                    if ((c == '\\') || (c == '}') || (c == '{'))
                    {
                        state = RtfParseState.Text;
                        text.Append(c);
                        break;
                    }

                    // ansi character escape
                    if (c == '\'')
                    {
                        text.Append(FromHexa((char)Reader.Read(), (char)Reader.Read()));
                        break;
                    }

                    if (!string.IsNullOrWhiteSpace(text.ToString()))
                    {
                        yield return new RtfText(text.ToString());
                        text.Clear();
                    }

                    // in fact, it's a normal controlWord
                    controlWord.Append(c);
                    state = RtfParseState.ControlWord;
                    break;
            }
        }
        while (true);
    }

    public static bool MoveToNextControlWord(IEnumerator<RtfObject> enumerator, string word)
    {
        if (enumerator == null)
            throw new ArgumentNullException("enumerator");

        while (enumerator.MoveNext())
        {
            if (enumerator.Current.Text == word)
                return true;
        }
        return false;
    }

    public static string GetNextText(IEnumerator<RtfObject> enumerator)
    {
        if (enumerator == null)
            throw new ArgumentNullException("enumerator");

        while (enumerator.MoveNext())
        {
            RtfText text = enumerator.Current as RtfText;
            if (text != null)
                return text.Text;
        }
        return null;
    }

    public static byte[] GetNextTextAsByteArray(IEnumerator<RtfObject> enumerator)
    {
        if (enumerator == null)
            throw new ArgumentNullException("enumerator");

        while (enumerator.MoveNext())
        {
            RtfText text = enumerator.Current as RtfText;
            if (text != null)
            {
                List<byte> bytes = new List<byte>();
                for (int i = 0; i < text.Text.Length; i += 2)
                {
                    bytes.Add((byte)FromHexa(text.Text[i], text.Text[i + 1]));
                }
                return bytes.ToArray();
            }
        }
        return null;
    }

    // Extracts an EmbeddedObject/ObjectHeader from a stream
    // see [MS -OLEDS]: Object Linking and Embedding (OLE) Data Structures for more information
    // chapter 2.2: OLE1.0 Format Structures 
    public static void ExtractObjectData(Stream inputStream, Stream outputStream)
    {
        if (inputStream == null)
            throw new ArgumentNullException("inputStream");

        if (outputStream == null)
            throw new ArgumentNullException("outputStream");

        BinaryReader reader = new BinaryReader(inputStream);
        reader.ReadInt32(); // OLEVersion
        int formatId = reader.ReadInt32(); // FormatID
        if (formatId != 2) // see 2.2.4 Object Header. 2 means EmbeddedObject
            throw new NotSupportedException();

        ReadLengthPrefixedAnsiString(reader); // className
        ReadLengthPrefixedAnsiString(reader); // topicName
        ReadLengthPrefixedAnsiString(reader); // itemName

        int nativeDataSize = reader.ReadInt32();
        byte[] bytes = reader.ReadBytes(nativeDataSize);
        outputStream.Write(bytes, 0, bytes.Length);
    }

    // see chapter 2.1.4 LengthPrefixedAnsiString
    private static string ReadLengthPrefixedAnsiString(BinaryReader reader)
    {
        int length = reader.ReadInt32();
        if (length == 0)
            return string.Empty;

        byte[] bytes = reader.ReadBytes(length);
        return Encoding.Default.GetString(bytes, 0, length - 1);
    }

    private enum RtfParseState
    {
        ControlWord,
        Text,
        EscapedText,
        Group
    }

    private static char FromHexa(char hi, char lo)
    {
        return (char)byte.Parse(hi.ToString() + lo, NumberStyles.HexNumber);
    }
}

// Utility class to parse an OLE1.0 OLEOBJECT
public class PackagedObject
{
    private PackagedObject()
    {
    }

    public string DisplayName { get; private set; }
    public string IconFilePath { get; private set; }
    public int IconIndex { get; private set; }
    public string FilePath { get; private set; }
    public byte[] Data { get; private set; }

    private static string ReadAnsiString(BinaryReader reader)
    {
        StringBuilder sb = new StringBuilder();
        do
        {
            byte b = reader.ReadByte();
            if (b == 0)
                return sb.ToString();

            sb.Append((char)b);
        }
        while (true);
    }

    public static PackagedObject Extract(Stream inputStream)
    {
        if (inputStream == null)
            throw new ArgumentNullException("inputStream");

        BinaryReader reader = new BinaryReader(inputStream);
        reader.ReadUInt16(); // sig
        PackagedObject po = new PackagedObject();
        po.DisplayName = ReadAnsiString(reader);
        po.IconFilePath = ReadAnsiString(reader);
        po.IconIndex = reader.ReadUInt16();
        int type = reader.ReadUInt16();
        if (type != 3) // 3 is file, 1 is link
            throw new NotSupportedException();

        reader.ReadInt32(); // nextsize
        po.FilePath = ReadAnsiString(reader);
        int dataSize = reader.ReadInt32();
        po.Data = reader.ReadBytes(dataSize);
        // note after that, there may be unicode + long path info
        return po;
    }
}

public class RtfObject
{
    public RtfObject(string text)
    {
        if (text == null)
            throw new ArgumentNullException("text");

        Text = text.Trim();
    }

    public string Text { get; private set; }
}

public class RtfText : RtfObject
{
    public RtfText(string text)
        : base(text)
    {
    }
}

public class RtfControlWord : RtfObject
{
    public RtfControlWord(string name)
        : base(name)
    {
    }
}