Extracting objects from a PDF
Example written in answer to the question Click How to extract embedded streams?
extractstreams
JAVA
JAVA
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Authors: Apryse Software.
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
package com.itextpdf.samples.sandbox.parse;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
public class ExtractStreams {
public static final String DEST = "./target/sandbox/parse";
public static final String SRC = "./src/main/resources/pdfs/image.pdf";
public static void before() {
new File(DEST).getParentFile().mkdirs();
}
public static void main(String[] args) throws IOException {
File file = new File(DEST);
file.mkdirs();
new ExtractStreams().manipulatePdf(DEST);
}
protected void manipulatePdf(String dest) throws IOException {
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));
int numberOfPdfObjects = pdfDoc.getNumberOfPdfObjects();
for (int i = 1; i <= numberOfPdfObjects; i++) {
PdfObject obj = pdfDoc.getPdfObject(i);
if (obj != null && obj.isStream()) {
byte[] b;
try {
// Get decoded stream bytes.
b = ((PdfStream) obj).getBytes();
} catch (PdfException exc) {
// Get originally encoded stream bytes
b = ((PdfStream) obj).getBytes(false);
}
try (FileOutputStream fos = new FileOutputStream(String.format(dest + "/extract_streams%s.dat", i))) {
fos.write(b);
}
}
}
pdfDoc.close();
}
}
C#
C#
using System;
using System.IO;
using iText.Kernel;
using iText.Kernel.Exceptions;
using iText.Kernel.Pdf;
namespace iText.Samples.Sandbox.Parse
{
public class ExtractStreams
{
public static readonly String DEST = "results/sandbox/parse";
public static readonly String SRC = "../../../resources/pdfs/image.pdf";
public static void Main(String[] args)
{
Directory.CreateDirectory(DEST);
new ExtractStreams().ManipulatePdf(DEST);
}
protected void ManipulatePdf(String dest)
{
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));
int numberOfPdfObject = pdfDoc.GetNumberOfPdfObjects();
for (int i = 1; i <= numberOfPdfObject; i++)
{
PdfObject obj = pdfDoc.GetPdfObject(i);
if (obj != null && obj.IsStream())
{
byte[] b;
try
{
// Get decoded stream bytes.
b = ((PdfStream) obj).GetBytes();
}
catch (PdfException)
{
// Get originally encoded stream bytes
b = ((PdfStream) obj).GetBytes(false);
}
using (FileStream fos = new FileStream(String.Format(dest + "/extract_streams{0}.dat", i), FileMode.Create))
{
fos.Write(b, 0, b.Length);
}
}
}
pdfDoc.Close();
}
}
}