pdfHTML: Accessible PDF Creation
A simple example showcasing the creation of a Tagged PDF with pdfHTML, and the configuration to include the metadata, necessary to obtain an Accessible PDF.
createaccessiblepdf
JAVA
JAVA
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Authors: Apryse Software.
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
package com.itextpdf.samples.sandbox.pdfhtml;
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfViewerPreferences;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.samples.sandbox.pdfhtml.headertagging.AccessibilityTagWorkerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class CreateAccessiblePDF {
public static final String SRC = "./src/main/resources/pdfhtml/AccessiblePDF/";
public static final String DEST = "./target/sandbox/pdfhtml/Accessibility.pdf";
public static void main(String[] args) throws IOException {
File file = new File(DEST);
file.getParentFile().mkdirs();
String htmlSource = SRC + "Accessibility.html";
new CreateAccessiblePDF().manipulatePdf(htmlSource, DEST);
}
public void manipulatePdf(String src, String dest) throws IOException {
FileOutputStream outputStream = new FileOutputStream(dest);
WriterProperties writerProperties = new WriterProperties();
writerProperties.addXmpMetadata();
PdfWriter pdfWriter = new PdfWriter(outputStream, writerProperties);
PdfDocument pdfDoc = new PdfDocument(pdfWriter);
pdfDoc.getCatalog().setLang(new PdfString("en-US"));
pdfDoc.setTagged();
pdfDoc.getCatalog().setViewerPreferences(new PdfViewerPreferences().setDisplayDocTitle(true));
PdfDocumentInfo pdfMetaData = pdfDoc.getDocumentInfo();
pdfMetaData.setAuthor("Samuel Huylebroeck");
pdfMetaData.addCreationDate();
pdfMetaData.getProducer();
pdfMetaData.setCreator("iText Software");
pdfMetaData.setKeywords("example, accessibility");
pdfMetaData.setSubject("PDF accessibility");
// Title is derived from html
// pdf conversion
FontProvider fontProvider = new FontProvider();
fontProvider.addStandardPdfFonts();
// The noto-nashk font file (.ttf extension) is placed in the resources
fontProvider.addDirectory(SRC);
ConverterProperties props = new ConverterProperties();
props.setFontProvider(fontProvider);
// Base URI is required to resolve the path to source files
props.setBaseUri(SRC);
// Setup custom tagworker factory for better tagging of headers
DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
props.setTagWorkerFactory(tagWorkerFactory);
HtmlConverter.convertToPdf(new FileInputStream(src), pdfDoc, props);
pdfDoc.close();
}
}
C#
C#
using System.IO;
using iText.Html2pdf;
using iText.Html2pdf.Attach.Impl;
using iText.Kernel.Pdf;
using iText.Layout.Font;
using iText.Samples.Sandbox.Pdfhtml.Headertagging;
namespace iText.Samples.Sandbox.Pdfhtml
{
public class CreateAccessiblePDF
{
public static readonly string SRC = "../../../resources/pdfhtml/AccessiblePDF/";
public static readonly string DEST = "results/sandbox/pdfhtml/Accessibility.pdf";
public static void Main(string[] args)
{
FileInfo file = new FileInfo(DEST);
file.Directory.Create();
string htmlSource = SRC + "Accessibility.html";
new CreateAccessiblePDF().ManipulatePdf(htmlSource, DEST);
}
public void ManipulatePdf(string src, string dest)
{
WriterProperties writerProperties = new WriterProperties();
writerProperties.AddXmpMetadata();
PdfWriter pdfWriter = new PdfWriter(dest, writerProperties);
PdfDocument pdfDoc = new PdfDocument(pdfWriter);
pdfDoc.GetCatalog().SetLang(new PdfString("en-US"));
pdfDoc.SetTagged();
pdfDoc.GetCatalog().SetViewerPreferences(new PdfViewerPreferences().SetDisplayDocTitle(true));
PdfDocumentInfo pdfMetaData = pdfDoc.GetDocumentInfo();
pdfMetaData.SetAuthor("Samuel Huylebroeck");
pdfMetaData.AddCreationDate();
pdfMetaData.GetProducer();
pdfMetaData.SetCreator("iText Software");
pdfMetaData.SetKeywords("example, accessibility");
pdfMetaData.SetSubject("PDF accessibility");
// Title is derived from html
// pdf conversion
ConverterProperties props = new ConverterProperties();
FontProvider fontProvider = new FontProvider();
fontProvider.AddStandardPdfFonts();
fontProvider.AddDirectory(SRC);
// The noto-nashk font file (.ttf extension) is placed in the resources
props.SetFontProvider(fontProvider);
// Base URI is required to resolve the path to source files
props.SetBaseUri(SRC);
// Setup custom tagworker factory for better tagging of headers
DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
props.SetTagWorkerFactory(tagWorkerFactory);
HtmlConverter.ConvertToPdf(new FileStream(src, FileMode.Open), pdfDoc, props);
pdfDoc.Close();
}
}
}
accessibilitytagworkerfactory
JAVA
JAVA
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Authors: Apryse Software.
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;
import com.itextpdf.html2pdf.attach.ITagWorker;
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.styledxmlparser.node.IElementNode;
public class AccessibilityTagWorkerFactory extends DefaultTagWorkerFactory {
@Override
public ITagWorker getCustomTagWorker(IElementNode tag, ProcessorContext context) {
switch (tag.name()) {
case "h1":
return new CustomHTagWorker(tag, context, 1);
case "h2":
return new CustomHTagWorker(tag, context, 2);
case "h3":
return new CustomHTagWorker(tag, context, 3);
case "h4":
return new CustomHTagWorker(tag, context, 4);
case "h5":
return new CustomHTagWorker(tag, context, 5);
case "h6":
return new CustomHTagWorker(tag, context, 6);
case "th":
return new CustomThTagWorker(tag, context);
default:
return null;
}
}
}
C#
C#
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl;
using iText.StyledXmlParser.Node;
namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
public class AccessibilityTagWorkerFactory : DefaultTagWorkerFactory
{
public override ITagWorker GetCustomTagWorker(IElementNode tag, ProcessorContext context)
{
switch (tag.Name())
{
case "h1":
return new CustomHTagWorker(tag, context, 1);
case "h2":
return new CustomHTagWorker(tag, context, 2);
case "h3":
return new CustomHTagWorker(tag, context, 3);
case "h4":
return new CustomHTagWorker(tag, context, 4);
case "h5":
return new CustomHTagWorker(tag, context, 5);
case "h6":
return new CustomHTagWorker(tag, context, 6);
case "th":
return new CustomThTagWorker(tag, context);
default:
return null;
}
}
}
}
customhtagworker
JAVA
JAVA
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Authors: Apryse Software.
For more information, please contact iText Software at this address:
sales@itextpdf.com
*/
package com.itextpdf.samples.sandbox.pdfhtml.headertagging;
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.tags.DivTagWorker;
import com.itextpdf.layout.IPropertyContainer;
import com.itextpdf.layout.element.Div;
import com.itextpdf.styledxmlparser.node.IElementNode;
public class CustomHTagWorker extends DivTagWorker {
private int i;
public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) {
super(element, context);
this.i = i;
}
@Override
public IPropertyContainer getElementResult() {
Div div = (Div) super.getElementResult();
div.getAccessibilityProperties().setRole("H" + i);
return super.getElementResult();
}
}
C#
C#
using iText.Html2pdf.Attach;
using iText.Html2pdf.Attach.Impl.Tags;
using iText.Layout;
using iText.Layout.Element;
using iText.StyledXmlParser.Node;
namespace iText.Samples.Sandbox.Pdfhtml.Headertagging
{
public class CustomHTagWorker : DivTagWorker
{
private int i;
public CustomHTagWorker(IElementNode element, ProcessorContext context, int i) : base(element, context)
{
this.i = i;
}
public override IPropertyContainer GetElementResult()
{
Div div = (Div) base.GetElementResult();
div.GetAccessibilityProperties().SetRole("H" + i);
return base.GetElementResult();
}
}
}