Skip to main content
Skip table of contents

pdfHTML: Custom ResourceRetriever

In release 7.1.13 of iText Core and 3.0.2 of pdfHTML we've implemented a new class called ResourceRetriever.
This class allows you to hook into and customize any resource fetching actions executed by pdfHTML. This allows us to do some stuff which we previously weren't able to do.

First of all, it allows us to set a size limit on our resource. It is possible for malicious resources to crash your application by providing an endless data stream which eats up all of the available memory. With the option to now set a size limit in our ResourceRetriever we are able to avoid this.


JAVA

JAVA
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.styledxmlparser.resolver.resource.DefaultResourceRetriever;
import com.itextpdf.styledxmlparser.resolver.resource.IResourceRetriever;
import java.io.File;
import java.io.IOException;

public class FilterSizeByDefaultResourceRetriever {
    public static final String SRC = "./src/main/resources/pdfhtml/FilterSizeByDefaultResourceRetriever/";
    public static final String DEST = "./target/sandbox/pdfhtml/FilterSizeByDefaultResourceRetriever.pdf";

    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        String htmlSource = SRC + "FilterSizeByDefaultResourceRetriever.html";

        new FilterSizeByDefaultResourceRetriever().manipulatePdf(htmlSource, DEST);
    }

    protected void manipulatePdf(String src, String dest) throws IOException {

        // Specify that resources exceeding 100kb will be filtered out, i.e. data will not be extracted from them.
        IResourceRetriever retriever = new DefaultResourceRetriever().setResourceSizeByteLimit(100_000);
        ConverterProperties converterProperties = new ConverterProperties();
        converterProperties.setResourceRetriever(retriever);

        HtmlConverter.convertToPdf(new File(src), new File(dest), converterProperties);
    }
}


C#

C#
using System;
using System.IO;
using iText.Html2pdf;
using iText.StyledXmlParser.Resolver.Resource;

namespace iText.Samples.Sandbox.Pdfhtml.Resource.Retriever
{
    public class FilterSizeByDefaultResourceRetriever
    {
        public static readonly string SRC = "../../../resources/pdfhtml/FilterSizeByDefaultResourceRetriever/";
        public static readonly string DEST = "results/sandbox/pdfhtml/FilterSizeByDefaultResourceRetriever.pdf";

        public static void Main(string[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();
            string htmlSource = SRC + "FilterSizeByDefaultResourceRetriever.html";

            new FilterSizeByDefaultResourceRetriever().ManipulatePdf(htmlSource, DEST);
        }

        protected void ManipulatePdf(String src, String dest)
        {
            // Specify that resources exceeding 100kb will be filtered out, i.e. data will not be extracted from them.
            IResourceRetriever retriever = new DefaultResourceRetriever().SetResourceSizeByteLimit(100_000);
            ConverterProperties converterProperties = new ConverterProperties();
            converterProperties.SetResourceRetriever(retriever);

            HtmlConverter.ConvertToPdf(new FileInfo(src), new FileInfo(dest), converterProperties);
        }
    }
}


You can also provide a custom implementation of the DefaultResourceRetriever class. This way, you are able to filter the resource requests in a more advanced manner. In the example below we filter our requests to only allow URLs that contain the "/imagePath" text in their path.

JAVA

JAVA
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.styledxmlparser.resolver.resource.DefaultResourceRetriever;
import com.itextpdf.styledxmlparser.resolver.resource.IResourceRetriever;
import java.io.File;
import java.io.IOException;
import java.net.URL;

public class FilterUrlByCustomResourceRetriever {
    public static final String SRC = "./src/main/resources/pdfhtml/FilterUrlByCustomResourceRetriever/";
    public static final String DEST = "./target/sandbox/pdfhtml/FilterUrlByCustomResourceRetriever.pdf";

    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        String htmlSource = SRC + "FilterUrlByCustomResourceRetriever.html";

        new FilterUrlByCustomResourceRetriever().manipulatePdf(htmlSource, DEST);
    }

    protected void manipulatePdf(String src, String dest) throws IOException {
        IResourceRetriever resourceRetriever = new FilterResourceRetriever();
        ConverterProperties converterProperties = new ConverterProperties();
        converterProperties.setResourceRetriever(resourceRetriever);

        HtmlConverter.convertToPdf(new File(src), new File(dest), converterProperties);
    }

    private static final class FilterResourceRetriever extends DefaultResourceRetriever {
        @Override
        protected boolean urlFilter(URL url) {

            // Specify that only urls, that are containing '/imagePath' text in the path, are allowed to handle
            return url.getPath().contains("/imagePath");
        }
    }
}


C#

C#
using System;
using System.IO;
using iText.Html2pdf;
using iText.StyledXmlParser.Resolver.Resource;

namespace iText.Samples.Sandbox.Pdfhtml.Resource.Retriever
{
    public class FilterUrlByCustomResourceRetriever
    {
        public static readonly string SRC = "../../../resources/pdfhtml/FilterUrlByCustomResourceRetriever/";
        public static readonly string DEST = "results/sandbox/pdfhtml/FilterUrlByCustomResourceRetriever.pdf";

        public static void Main(string[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();
            string htmlSource = SRC + "FilterUrlByCustomResourceRetriever.html";

            new FilterUrlByCustomResourceRetriever().ManipulatePdf(htmlSource, DEST);
        }

        protected void ManipulatePdf(String src, String dest)
        {
            IResourceRetriever resourceRetriever = new FilterResourceRetriever();
            ConverterProperties converterProperties = new ConverterProperties();
            converterProperties.SetResourceRetriever(resourceRetriever);

            HtmlConverter.ConvertToPdf(new FileInfo(src), new FileInfo(dest), converterProperties);
        }

        private class FilterResourceRetriever : DefaultResourceRetriever
        {
            protected override bool UrlFilter(Uri url)
            {
                // Specify that only urls, that are containing '/imagePath' text in the path, are allowed to handle
                return url.AbsolutePath.Contains("/imagePath");
            }
        }
    }
}



It is now also possible to, programmatically and at runtime, replace a specifically named resource with another one. As is shown in our final example:

JAVA

JAVA
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.styledxmlparser.resolver.resource.IResourceRetriever;
import com.itextpdf.styledxmlparser.resolver.resource.UriResolver;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

public class AddDefaultImagesCustomRetriever {
    public static final String SRC = "./src/main/resources/pdfhtml/";
    public static final String DEST = "./target/sandbox/pdfhtml/AddDefaultImagesCustomRetriever.pdf";

    public static void main(String[] args) throws IOException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        String htmlSource = SRC + "AddDefaultImagesCustomRetriever/AddDefaultImagesCustomRetriever.html";

        new AddDefaultImagesCustomRetriever().manipulatePdf(htmlSource, DEST);
    }

    protected void manipulatePdf(String src, String dest) throws IOException {
        IResourceRetriever retriever = new CustomResourceRetriever(SRC);
        ConverterProperties converterProperties = new ConverterProperties();
        converterProperties.setResourceRetriever(retriever);

        HtmlConverter.convertToPdf(new File(src), new File(dest), converterProperties);
    }

    private static final class CustomResourceRetriever implements IResourceRetriever {
        private String baseUri;

        public CustomResourceRetriever(String baseUri) {
            this.baseUri = baseUri;
        }

        @Override
        public InputStream getInputStreamByUrl(URL url) throws IOException {

            // The image with name 'imageToReplace.png' will be replaced by the default image.
            if (url.toString().contains("imageToReplace.png")) {
                url = new UriResolver(this.baseUri).resolveAgainstBaseUri("images/defaultImage.png");
            }

            return url.openStream();
        }

        @Override
        public byte[] getByteArrayByUrl(URL url) throws IOException {
            byte[] result = null;
            try (InputStream stream = getInputStreamByUrl(url)) {
                if (stream == null) {
                    return null;
                }

                result = inputStreamToArray(stream);
            }

            return result;
        }

        private static byte[] inputStreamToArray(InputStream stream) throws java.io.IOException {
            byte[] b = new byte[8192];
            ByteArrayOutputStream output = new ByteArrayOutputStream();
            while (true) {
                int read = stream.read(b);
                if (read < 1) {
                    break;
                }
                output.write(b, 0, read);
            }

            output.close();
            return output.toByteArray();
        }
    }
}

C#

C#
using System;
using System.IO;
using iText.Html2pdf;
using iText.StyledXmlParser.Resolver.Resource;

namespace iText.Samples.Sandbox.Pdfhtml.Resource.Retriever
{
    public class AddDefaultImagesCustomRetriever
    {
        public static readonly string SRC = "../../../resources/pdfhtml/";
        public static readonly string DEST = "results/sandbox/pdfhtml/AddDefaultImagesCustomRetriever.pdf";

        public static void Main(string[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();
            string htmlSource = SRC + "AddDefaultImagesCustomRetriever/AddDefaultImagesCustomRetriever.html";

            new AddDefaultImagesCustomRetriever().ManipulatePdf(htmlSource, DEST);
        }

        protected void ManipulatePdf(String src, String dest)
        {
            IResourceRetriever retriever = new CustomResourceRetriever(SRC);
            ConverterProperties converterProperties = new ConverterProperties();
            converterProperties.SetResourceRetriever(retriever);

            HtmlConverter.ConvertToPdf(new FileInfo(src), new FileInfo(dest), converterProperties);
        }

        private class CustomResourceRetriever : IResourceRetriever
        {
            private String baseUri;

            public CustomResourceRetriever(String baseUri)
            {
                this.baseUri = baseUri;
            }

            public Stream GetInputStreamByUrl(Uri url)
            {
                // The image with name 'imageToReplace.png' will be replaced by the default image.
                if (url.ToString().Contains("imageToReplace.png"))
                {
                    url = new UriResolver(this.baseUri).ResolveAgainstBaseUri("images/defaultImage.png");
                }

                return new FileStream(url.LocalPath, FileMode.Open, FileAccess.Read);
            }

            public byte[] GetByteArrayByUrl(Uri url)
            {
                byte[] result = null;
                using (Stream stream = GetInputStreamByUrl(url))
                {
                    if (stream == null)
                    {
                        return null;
                    }

                    result = InputStreamToArray(stream);
                }

                return result;
            }

            private static byte[] InputStreamToArray(Stream stream)
            {
                byte[] b = new byte[8192];
                MemoryStream output = new MemoryStream();
                while (true) {
                    int read = stream.Read(b, 0, b.Length);
                    if (read < 1) {
                        break;
                    }
                    
                    output.Write(b, 0, read);
                }
                
                output.Dispose();
                return output.ToArray();
            }
        }
    }



JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.