开发者

pdf content stream parsing

开发者 https://www.devze.com 2023-03-25 05:18 出处:网络
i need help with parsing pdf the pdf builded in illustrator and it have 4 layer and each layer have one graphic path object

i need help with parsing pdf the pdf builded in illustrator and it have 4 layer and each layer have one graphic path object what i wont to do is to get all the 4 graphic paths and draw them in another pdf file that have the same width and hight as this pdf and i want to draw them in the same positions. this is the code i started to write:

    public static List<PDFMask> GetMasksFromPage(PdfPage page)
    {
            List<PDFMask> masks = new List<PDFMask>();

            PdfDictionary contents = page.Elements.GetDictionary("/Contents");
            PdfDictionary.PdfStream contentsStream = contents.Stream;


            PdfDictionary resources = page.Elements.GetDictionary("/Resources");
            PdfDictionary properties = resources.Elements.GetDictionary("/Properties");
            PdfName[] keys = properties.Elements.KeyNames;



        int dataStartPointer = 0;
        int dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
        int dataCount = dataEndPointer+4;
        for (int i = 0; i < keys.Length; i++)
        {
            PdfDictionary mc = properties.Elements.GetDictionary(keys[i].Value);

            PDFMask mask = new PDFMask();
            mask.name = mc.Elements.GetString("/Title");
            mask.key = keys[i].Value;

            byte[] data = new byte[dataCount];
            Array.Copy(contentsStream.UnfilteredValue, dataStartPointer, mask.data, 0, dataCount);

            mask.parseData(data);

            dataStartPointer += dataCount+1;
            dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
            dataCount = dataEndPointer + 4 - dataStartPointer; 

            ma开发者_JS百科sks.Add(mask);
        }

        return masks;
    }

now the code above used for get all the layers data and seporate them in to 4 objects

     PdfDictionary.PdfStream contentsStream = contents.Stream;

this line give me the 4 layers grapichs binary data now this is the PDFMask Class that repesent a 1 layer

public class PDFMask
{
    public string name;
    public string key;
    public byte[] data;

    public void parseData(byte[] data)
    {
         this.data = data; //how i parsing this data to some XGrapic Object?
    }
 }

now this is what the data source look like:

   /Layer /MC0 BDC 
   0.75 0.68 0.67 0.902 k
   /GS0 gs
   q 1 0 0 1 396.4473 1835.6143 cm
   0 0 m
   76.497 -132.515 l
   -17.184 -159.051 l
   76.496 -185.607 l
   -0.003 -318.119 l
   -72.563 -252.047 l
   -50.486 -349.178 l
   -202.179 -349.182 l
   -180.097 -252.046 l
   -252.658 -318.116 l
   -329.154 -185.603 l
   -235.473 -159.048 l
   -329.154 -132.511 l
   -252.654 0.002 l
   -180.094 -66.07 l
   -202.175 31.087 l
   -50.482 31.081 l
   -72.563 -66.072 l
   h
   f
   Q
   EMC 

i looking for some parser (i will prefer a pdfsharp parser) that can parse this data to some graphic object that i colud use it on another pdf document


ok what i did to slove this is to buid my own parser for my own needs i will display here th code i am sure it will help someone someday...

public struct GD { public double x, y, a, b, c, d; public byte t; }
public struct Coordinate { public double locX, locY, oriX, oriY, xAxis, yAxis; } 

public class PDFMask
{
    private string _name;
    public string fun;

    public string name
    {
        get
        {
            return _name;
        }
        set
        {
            if (value.Contains("{"))
            {
                _name = value.Substring(0, value.IndexOf("{"));
                fun = value.Substring(value.IndexOf("{"));
            }
            else
            {
                _name = value;
            }
        }
    }
    public string key;
    public byte[] data;
    public GD[] graphicsDirectives;
    public Coordinate coordinate;

    public void parseData(byte[] data)
    {
        this.data = data;

        graphicsDirectives = new GD[100];
        int gdCount = 0;

        byte[] buffer = new byte[100];
        int bufferCount = 0;

        for (int i = 0; i < data.Length; i++)
        {
            switch (data[i])
            {
                case (byte)'\n':
                    if (bufferCount > 2 && buffer[bufferCount - 2] == ' ' && (buffer[bufferCount - 1] == 'c' || buffer[bufferCount - 1] == 'l' || buffer[bufferCount - 1] == 'm'))
                        graphicsDirectives[gdCount++] = parseDataWriteGD(buffer, bufferCount);
                    else if (bufferCount > 3 && buffer[0] == 'q' && buffer[bufferCount - 1] == 'm' && buffer[bufferCount - 2] == 'c')
                        coordinate = parseDataWriteCoordinate(buffer, bufferCount);


                    bufferCount = 0;
                    break;


                default :
                    buffer[bufferCount++] = data[i];
                    break;
            }
        }

        GD[] actualGraphicsDirectives = new GD[gdCount];
        Array.Copy(graphicsDirectives, actualGraphicsDirectives, gdCount);
        graphicsDirectives = actualGraphicsDirectives;
    }

    public Coordinate parseDataWriteCoordinate(byte[] bytes, int count)
    {
        byte[] actualBytes = new byte[count];
        Array.Copy(bytes, actualBytes, count);

        string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

        Coordinate c = new Coordinate();
        c.locX = double.Parse(values[1]);
        c.locY = double.Parse(values[2]);
        c.oriX = double.Parse(values[3]);
        c.oriY = double.Parse(values[4]);
        c.xAxis = double.Parse(values[5]);
        c.yAxis = double.Parse(values[6]); 

        return c;
    }

    public GD parseDataWriteGD(byte[] bytes, int count)
    {
        byte[] actualBytes = new byte[count];
        Array.Copy(bytes, actualBytes, count);

        string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

        GD gd = new GD();
        gd.t = (byte)values[values.Length - 1][0];

        if (gd.t == 'c')
        {
            gd.a = double.Parse(values[0]);
            gd.b = double.Parse(values[1]);
            gd.c = double.Parse(values[2]);
            gd.d = double.Parse(values[3]);
            gd.x = double.Parse(values[4]);
            gd.y = double.Parse(values[5]);
        }
        else
        {
            gd.x = double.Parse(values[0]);
            gd.y = double.Parse(values[1]);
        }

        return gd;
    }
}
0

精彩评论

暂无评论...
验证码 换一张
取 消