Skip to main content
Skip table of contents

Extracting objects from a PDF

Example written in answer to the question Click How to extract embedded streams?


extractstreams

JAVA

JAVA
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.parse;

import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

public class ExtractStreams {
    public static final String DEST = "./target/sandbox/parse";

    public static final String SRC = "./src/main/resources/pdfs/image.pdf";

    public static void before() {
        new File(DEST).getParentFile().mkdirs();
    }

    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.mkdirs();

        new ExtractStreams().manipulatePdf(DEST);
    }

    protected void manipulatePdf(String dest) throws IOException {
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));

        int numberOfPdfObjects = pdfDoc.getNumberOfPdfObjects();
        for (int i = 1; i <= numberOfPdfObjects; i++) {
            PdfObject obj = pdfDoc.getPdfObject(i);
            if (obj != null && obj.isStream()) {
                byte[] b;
                try {

                    // Get decoded stream bytes.
                    b = ((PdfStream) obj).getBytes();
                } catch (PdfException exc) {

                    // Get originally encoded stream bytes
                    b = ((PdfStream) obj).getBytes(false);
                }

                try (FileOutputStream fos = new FileOutputStream(String.format(dest + "/extract_streams%s.dat", i))) {
                    fos.write(b);
                }
            }
        }

        pdfDoc.close();
    }
}

C#

C#
using System;
using System.IO;
using iText.Kernel;
using iText.Kernel.Exceptions;
using iText.Kernel.Pdf;

namespace iText.Samples.Sandbox.Parse
{
    public class ExtractStreams
    {
        public static readonly String DEST = "results/sandbox/parse";

        public static readonly String SRC = "../../../resources/pdfs/image.pdf";

        public static void Main(String[] args)
        {
            Directory.CreateDirectory(DEST);

            new ExtractStreams().ManipulatePdf(DEST);
        }

        protected void ManipulatePdf(String dest)
        {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));

            int numberOfPdfObject = pdfDoc.GetNumberOfPdfObjects();
            for (int i = 1; i <= numberOfPdfObject; i++)
            {
                PdfObject obj = pdfDoc.GetPdfObject(i);
                if (obj != null && obj.IsStream())
                {
                    byte[] b;
                    try
                    {
                        
                        // Get decoded stream bytes.
                        b = ((PdfStream) obj).GetBytes();
                    }
                    catch (PdfException)
                    {
                        
                        // Get originally encoded stream bytes
                        b = ((PdfStream) obj).GetBytes(false);
                    }

                    using (FileStream fos = new FileStream(String.Format(dest + "/extract_streams{0}.dat", i), FileMode.Create))
                    {
                        fos.Write(b, 0, b.Length);
                    }
                }
            }

            pdfDoc.Close();
        }
    }
}
JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.