Skip to main content
Skip table of contents

pdfOCR: Custom session options for an ONNX model

The release of pdfOCR 5.0.0 introduced support for pretrained ONNX PaddleOCR and EasyOCR models, adding to the docTR models already supported.

The following code sample shows an example of customizing the ONNX Runtime session configuration used by the pdfOCR ONNX-based engine when creating searchable PDFs from images.

Check the comments in the example for more details on customizing session options.

Compatible PaddleOCR/EasyOCR models already converted to ONNX format are available from our Hugging Face repository.

Java

JAVA
package com.itextpdf.samples.sandbox.pdfocr.onnx;

import ai.onnxruntime.OrtEnvironment;
import ai.onnxruntime.OrtException;
import ai.onnxruntime.OrtProvider;
import ai.onnxruntime.OrtSession;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfocr.OcrPdfCreator;
import com.itextpdf.pdfocr.onnx.IOrtSessionOptionsCreator;
import com.itextpdf.pdfocr.onnx.OnnxOcrEngine;
import com.itextpdf.pdfocr.onnx.detection.IDetectionPredictor;
import com.itextpdf.pdfocr.onnx.detection.OnnxDetectionPredictor;
import com.itextpdf.pdfocr.onnx.recognition.IRecognitionPredictor;
import com.itextpdf.pdfocr.onnx.recognition.OnnxRecognitionPredictor;

import java.io.File;
import java.util.Collections;

/**
 * CustomOnnxRuntimeSessionOptionsExample.java
 *
 * <p>
 * This example demonstrates how to provide custom {@link ai.onnxruntime.OrtSession.SessionOptions}
 * used to construct {@link OrtSession} which wraps an ONNX model and allows inference calls.
 * This will allow to specify whether to run OCR on GPU or CPU, execution mode, optimization level and other options.
 *
 * <p>
 * In order to run models on GPU, add pdfocr-onnx-abstract and onnxruntime_gpu dependencies.
 * {@link com.itextpdf.pdfocr.onnx.DefaultOrtSessionOptionsCreator} supports GPU mode by default,
 * so no additional changes required unless you want to set up some custom options.
 *
 * <p>
 * Required software: iText 9.6.0, pdfOCR-Onnx 5.0.0
 * (pdfocr-onnx-cpu dependency to execute ONNX models on CPU or 
 * pdfocr-onnx-abstract and onnxruntime_gpu dependencies to execute ONNX models on GPU).
 */
public class CustomOnnxRuntimeSessionOptionsExample {
    public static final String DEST = "./target/sandbox/pdfocr/onnx/CustomOnnxRuntimeSessionOptionsExample/result.pdf";

    private static final String BASIC_IMAGE = "./src/main/resources/img/ocrExample.png";

    private static final String MODELS = "./src/main/resources/models/paddleocr/";
    private static final String DET = MODELS + "PP-OCRv5_mobile_det_infer";
    private static final String REC = MODELS + "PP-OCRv5_mobile_rec_infer";

    public static void main(String[] args) throws Exception {
        File file = new File(DEST);
        file.getParentFile().mkdirs();

        new CustomOnnxRuntimeSessionOptionsExample().manipulate(DEST);
    }

    protected void manipulate(String destination) throws Exception {
        // Create custom IOrtSessionOptionsCreator and use it to create predictors.
        IOrtSessionOptionsCreator sessionOptionsCreator = new CustomOrtSessionOptionsCreator();

        IDetectionPredictor detectionPredictor = OnnxDetectionPredictor.paddleOcr(DET, sessionOptionsCreator);
        IRecognitionPredictor recognitionPredictor = OnnxRecognitionPredictor.paddleOcr(REC, sessionOptionsCreator);

        // OnnxOcrEngine shall be closed after usage to avoid native allocations leak.
        // It will also close all predictors used for its creation.
        try (OnnxOcrEngine ocrEngine = new OnnxOcrEngine(detectionPredictor, recognitionPredictor)) {
            OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine);
            pdfCreator.createPdf(Collections.singletonList(new File(BASIC_IMAGE)), new PdfWriter(destination)).close();
        }
    }

    /**
     * Implementation of {@link IOrtSessionOptionsCreator}.
     *
     * <p>
     * {@code CUDA} execution provider is added if available, otherwise default {@code CPU} execution provider is used.
     */
    public static class CustomOrtSessionOptionsCreator implements IOrtSessionOptionsCreator {
        @Override
        public OrtSession.SessionOptions create() throws OrtException {
            final OrtSession.SessionOptions ortOptions = new OrtSession.SessionOptions();
            try {
                if (OrtEnvironment.getAvailableProviders().contains(OrtProvider.CUDA)) {
                    // Use CUDA provider to run OCR on GPU.
                    ortOptions.addCUDA();
                } else {
                    ortOptions.addCPU(true);
                    ortOptions.setIntraOpNumThreads(-1);
                    ortOptions.setInterOpNumThreads(-1);
                }
                ortOptions.setExecutionMode(OrtSession.SessionOptions.ExecutionMode.SEQUENTIAL);
                ortOptions.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT);
                return ortOptions;
            } catch (Exception e) {
                ortOptions.close();
                throw e;
            }
        }
    }
}

C#

C#
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using iText.Kernel.Pdf;
using iText.Pdfocr;
using iText.Pdfocr.Onnx;
using iText.Pdfocr.Onnx.Detection;
using iText.Pdfocr.Onnx.Recognition;

namespace iText.Samples.Sandbox.Pdfocr.Onnx {
    /// <summary>CustomOnnxRuntimeSessionOptionsExample.cs</summary>
    /// <remarks>
    /// CustomOnnxRuntimeSessionOptionsExample.cs
    /// <para />
    /// This example demonstrates how to provide custom
    /// <see cref="Microsoft.ML.OnnxRuntime.SessionOptions"/>
    /// used to construct
    /// <see cref="Microsoft.ML.OnnxRuntime.OrtSession"/>
    /// which wraps an ONNX model and allows inference calls.
    /// This will allow to specify whether to run OCR on GPU or CPU,
    /// execution mode, optimization level and other options.
    /// <para />
    /// In order to run models on GPU, add itext.pdfocr.onnx.abstract and Microsoft.ML.OnnxRuntime.Gpu dependencies.
    /// <see cref="iText.Pdfocr.Onnx.DefaultOrtSessionOptionsCreator"/>
    /// supports GPU mode by default,
    /// so no additional changes required unless you want to set up some custom options.
    /// <para />
    /// Required software: iText 9.6.0, pdfOCR-Onnx 5.0.0
    /// (itext.pdfocr.onnx.cpu dependency to execute ONNX models on CPU or
    /// itext.pdfocr.onnx.abstract and Microsoft.ML.OnnxRuntime.Gpu dependencies to execute ONNX models on GPU).
    /// </remarks>
    public class CustomOnnxRuntimeSessionOptionsExample {
        public const String DEST = "results/sandbox/pdfocr/onnx/CustomOnnxRuntimeSessionOptionsExample/result.pdf";

        private const String BASIC_IMAGE = "../../../resources/img/ocrExample.png";

        private const String MODELS = "../../../resources/models/paddleocr/";

        private const String DET = MODELS + "PP-OCRv5_mobile_det_infer";

        private const String REC = MODELS + "PP-OCRv5_mobile_rec_infer";

        public static void Main(String[] args) {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();

            new CustomOnnxRuntimeSessionOptionsExample().Manipulate(DEST);
        }

        protected internal virtual void Manipulate(String destination) {
            // Create custom IOrtSessionOptionsCreator and use it to create predictors.
            IOrtSessionOptionsCreator sessionOptionsCreator = 
                new CustomOnnxRuntimeSessionOptionsExample.CustomOrtSessionOptionsCreator();

            IDetectionPredictor detectionPredictor = OnnxDetectionPredictor.PaddleOcr(DET, sessionOptionsCreator);
            IRecognitionPredictor recognitionPredictor = OnnxRecognitionPredictor.PaddleOcr(REC, sessionOptionsCreator);

            // OnnxOcrEngine shall be closed after usage to avoid native allocations leak.
            // It will also close all predictors used for its creation.
            using (OnnxOcrEngine ocrEngine = new OnnxOcrEngine(detectionPredictor, recognitionPredictor)) {
                OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine);
                pdfCreator.CreatePdf(new List<FileInfo> { new FileInfo(BASIC_IMAGE) }, new PdfWriter(destination))
                    .Close();
            }
        }

        /// <summary>
        /// Implementation of
        /// <see cref="iText.Pdfocr.Onnx.IOrtSessionOptionsCreator"/>.
        /// </summary>
        /// <remarks>
        /// Implementation of
        /// <see cref="iText.Pdfocr.Onnx.IOrtSessionOptionsCreator"/>.
        /// <para />
        /// <c>CUDA</c>
        /// execution provider is added if available, otherwise default
        /// <c>CPU</c>
        /// execution provider is used.
        /// </remarks>
        public class CustomOrtSessionOptionsCreator : IOrtSessionOptionsCreator {
            public virtual SessionOptions Create() {
                SessionOptions ortOptions = new SessionOptions();
                try {
                    if (OrtEnv.Instance().GetAvailableProviders().Contains("CUDAExecutionProvider")) {
                        // Use CUDA provider to run OCR on GPU.
                        ortOptions.AppendExecutionProvider_CUDA(0);    
                    }
                    else {
                        ortOptions.AppendExecutionProvider_CPU();    
                        ortOptions.IntraOpNumThreads = -1;
                        ortOptions.InterOpNumThreads = -1;
                    }
                    ortOptions.ExecutionMode = ExecutionMode.ORT_SEQUENTIAL;
                    ortOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
                    return ortOptions;
                } catch (Exception e) {
                    ortOptions.Close();
                    throw;
                }
            }
        }
    }
}
JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.