Skip to main content
Skip table of contents

pdfHTML: Accessible PDF Creation

A simple example showcasing the creation of a Tagged PDF with pdfHTML, and the configuration to include the metadata, necessary to obtain an Accessible PDF.


createaccessiblepdf

JAVA

JAVA
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml;

import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfViewerPreferences;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.samples.sandbox.pdfhtml.headertagging.AccessibilityTagWorkerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

public class CreateAccessiblePDF {
    public static final String SRC = "./src/main/resources/pdfhtml/AccessiblePDF/";
    public static final String DEST = "./target/sandbox/pdfhtml/Accessibility.pdf";

    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        String htmlSource = SRC + "Accessibility.html";

        new CreateAccessiblePDF().manipulatePdf(htmlSource, DEST);
    }

    public void manipulatePdf(String src, String dest) throws IOException {
        FileOutputStream outputStream = new FileOutputStream(dest);
        WriterProperties writerProperties = new WriterProperties();
        writerProperties.addXmpMetadata();

        PdfWriter pdfWriter = new PdfWriter(outputStream, writerProperties);
        PdfDocument pdfDoc = new PdfDocument(pdfWriter);
        pdfDoc.getCatalog().setLang(new PdfString("en-US"));

        pdfDoc.setTagged();
        pdfDoc.getCatalog().setViewerPreferences(new PdfViewerPreferences().setDisplayDocTitle(true));

        PdfDocumentInfo pdfMetaData = pdfDoc.getDocumentInfo();
        pdfMetaData.setAuthor("Samuel Huylebroeck");
        pdfMetaData.addCreationDate();
        pdfMetaData.getProducer();
        pdfMetaData.setCreator("iText Software");
        pdfMetaData.setKeywords("example, accessibility");
        pdfMetaData.setSubject("PDF accessibility");
        // Title is derived from html

        // pdf conversion
        FontProvider fontProvider = new FontProvider();
        fontProvider.addStandardPdfFonts();
        // The noto-nashk font file (.ttf extension) is placed in the resources
        fontProvider.addDirectory(SRC);

        ConverterProperties props = new ConverterProperties();
        props.setFontProvider(fontProvider);
        // Base URI is required to resolve the path to source files
        props.setBaseUri(SRC);

        // Setup custom tagworker factory for better tagging of headers
        DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
        props.setTagWorkerFactory(tagWorkerFactory);

        HtmlConverter.convertToPdf(new FileInputStream(src), pdfDoc, props);

        pdfDoc.close();
    }
}

C#

C#
using System.IO;
using iText.Html2pdf;
using iText.Html2pdf.Attach.Impl;
using iText.Kernel.Pdf;
using iText.Layout.Font;
using iText.Samples.Sandbox.Pdfhtml.Headertagging;

namespace iText.Samples.Sandbox.Pdfhtml
{
    public class CreateAccessiblePDF
    {
        public static readonly string SRC = "../../../resources/pdfhtml/AccessiblePDF/";
        public static readonly string DEST = "results/sandbox/pdfhtml/Accessibility.pdf";

        public static void Main(string[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();
            string htmlSource = SRC + "Accessibility.html";

            new CreateAccessiblePDF().ManipulatePdf(htmlSource, DEST);
        }

        public void ManipulatePdf(string src, string dest)
        {
            WriterProperties writerProperties = new WriterProperties();
            writerProperties.AddXmpMetadata();

            PdfWriter pdfWriter = new PdfWriter(dest, writerProperties);
            PdfDocument pdfDoc = new PdfDocument(pdfWriter);
            pdfDoc.GetCatalog().SetLang(new PdfString("en-US"));

            pdfDoc.SetTagged();
            pdfDoc.GetCatalog().SetViewerPreferences(new PdfViewerPreferences().SetDisplayDocTitle(true));

            PdfDocumentInfo pdfMetaData = pdfDoc.GetDocumentInfo();
            pdfMetaData.SetAuthor("Samuel Huylebroeck");
            pdfMetaData.AddCreationDate();
            pdfMetaData.GetProducer();
            pdfMetaData.SetCreator("iText Software");
            pdfMetaData.SetKeywords("example, accessibility");
            pdfMetaData.SetSubject("PDF accessibility");

            // Title is derived from html

            // pdf conversion
            ConverterProperties props = new ConverterProperties();
            FontProvider fontProvider = new FontProvider();
            fontProvider.AddStandardPdfFonts();
            fontProvider.AddDirectory(SRC);

            // The noto-nashk font file (.ttf extension) is placed in the resources
            props.SetFontProvider(fontProvider);
            // Base URI is required to resolve the path to source files
            props.SetBaseUri(SRC);

            // Setup custom tagworker factory for better tagging of headers
            DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
            props.SetTagWorkerFactory(tagWorkerFactory);
            
            HtmlConverter.ConvertToPdf(new FileStream(src, FileMode.Open), pdfDoc, props);

            pdfDoc.Close();
        }
    }
}


accessibilitytagworkerfactory

JAVA

JAVA
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;

import com.itextpdf.html2pdf.attach.ITagWorker;
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.styledxmlparser.node.IElementNode;

public class AccessibilityTagWorkerFactory extends DefaultTagWorkerFactory {

    @Override
    public ITagWorker getCustomTagWorker(IElementNode tag, ProcessorContext context) {
        switch (tag.name()) {
            case "h1":
                return new CustomHTagWorker(tag, context, 1);
            case "h2":
                return new CustomHTagWorker(tag, context, 2);
            case "h3":
                return new CustomHTagWorker(tag, context, 3);
            case "h4":
                return new CustomHTagWorker(tag, context, 4);
            case "h5":
                return new CustomHTagWorker(tag, context, 5);
            case "h6":
                return new CustomHTagWorker(tag, context, 6);
            case "th":
                return new CustomThTagWorker(tag, context);
            default:
                return null;
        }
    }
}

C#

C#
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl;
using iText.StyledXmlParser.Node;

namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
    public class AccessibilityTagWorkerFactory : DefaultTagWorkerFactory
    {
        public override ITagWorker GetCustomTagWorker(IElementNode tag, ProcessorContext context)
        {
            switch (tag.Name())
            {
                case "h1":
                    return new CustomHTagWorker(tag, context, 1);
                case "h2":
                    return new CustomHTagWorker(tag, context, 2);
                case "h3":
                    return new CustomHTagWorker(tag, context, 3);
                case "h4":
                    return new CustomHTagWorker(tag, context, 4);
                case "h5":
                    return new CustomHTagWorker(tag, context, 5);
                case "h6":
                    return new CustomHTagWorker(tag, context, 6);
                case "th":
                    return new CustomThTagWorker(tag, context);
                default:
                    return null;
            }
        }
    }
}


customhtagworker

JAVA

JAVA
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;

import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.tags.DivTagWorker;
import com.itextpdf.layout.IPropertyContainer;
import com.itextpdf.layout.element.Div;
import com.itextpdf.styledxmlparser.node.IElementNode;


public class CustomHTagWorker extends DivTagWorker {
    private int i;

    public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) {
        super(element, context);
        this.i = i;
    }

    @Override
    public IPropertyContainer getElementResult() {
        Div div = (Div) super.getElementResult();
        div.getAccessibilityProperties().setRole("H" + i);
        return super.getElementResult();
    }
}

C#

C#
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl.Tags;
using iText.Layout;
using iText.Layout.Element;
using iText.StyledXmlParser.Node;

namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
    public class CustomHTagWorker : DivTagWorker
    {
        private int i;
        public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) : base(element, context)
        {
            this.i = i;
        }

        public override IPropertyContainer GetElementResult()
        {
            Div div = (Div) base.GetElementResult();
            div.GetAccessibilityProperties().SetRole("H" + i);
            return base.GetElementResult();
        }
    }
}
JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.