How to check that all fonts that are used in a PDF file are embedded in the file with Java and iText? I have some existing PDF documents, and I'd like to validate that they use only embedded fonts.
This would require check开发者_C百科ing that no PDF standard fonts are used and other used fonts are embedded in the file.
Look at the ListUsedFonts example from iText in Action.
http://itextpdf.com/examples/iia.php?id=287
Looks like this will print out the fonts used in a pdf and if they are embedded.
/*
* This class is part of the book "iText in Action - 2nd Edition"
* written by Bruno Lowagie (ISBN: 9781935182610)
* For more info, go to: http://itextpdf.com/examples/
* This example only works with the AGPL version of iText.
*/
package part4.chapter16;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Set;
import java.util.TreeSet;
import part3.chapter11.FontTypes;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfReader;
public class ListUsedFonts {
/** The resulting PDF file. */
public static String RESULT
= "results/part4/chapter16/fonts.txt";
/**
* Creates a Set containing information about the fonts in the src PDF file.
* @param src the path to a PDF file
* @throws IOException
*/
public Set<String> listFonts(String src) throws IOException {
Set<String> set = new TreeSet<String>();
PdfReader reader = new PdfReader(src);
PdfDictionary resources;
for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
resources = reader.getPageN(k).getAsDict(PdfName.RESOURCES);
processResource(set, resources);
}
reader.close();
return set;
}
/**
* Extracts the font names from page or XObject resources.
* @param set the set with the font names
* @param resources the resources dictionary
*/
public static void processResource(Set<String> set, PdfDictionary resource) {
if (resource == null)
return;
PdfDictionary xobjects = resource.getAsDict(PdfName.XOBJECT);
if (xobjects != null) {
for (PdfName key : xobjects.getKeys()) {
processResource(set, xobjects.getAsDict(key));
}
}
PdfDictionary fonts = resource.getAsDict(PdfName.FONT);
if (fonts == null)
return;
PdfDictionary font;
for (PdfName key : fonts.getKeys()) {
font = fonts.getAsDict(key);
String name = font.getAsName(PdfName.BASEFONT).toString();
if (name.length() > 8 && name.charAt(7) == '+') {
name = String.format("%s subset (%s)", name.substring(8), name.substring(1, 7));
}
else {
name = name.substring(1);
PdfDictionary desc = font.getAsDict(PdfName.FONTDESCRIPTOR);
if (desc == null)
name += " nofontdescriptor";
else if (desc.get(PdfName.FONTFILE) != null)
name += " (Type 1) embedded";
else if (desc.get(PdfName.FONTFILE2) != null)
name += " (TrueType) embedded";
else if (desc.get(PdfName.FONTFILE3) != null)
name += " (" + font.getAsName(PdfName.SUBTYPE).toString().substring(1) + ") embedded";
}
set.add(name);
}
}
/**
* Main method.
*
* @param args no arguments needed
* @throws DocumentException
* @throws IOException
*/
public static void main(String[] args) throws IOException, DocumentException {
new FontTypes().createPdf(FontTypes.RESULT);
Set<String> set = new ListUsedFonts().listFonts(FontTypes.RESULT);
PrintWriter out = new PrintWriter(new FileOutputStream(RESULT));
for (String fontname : set)
out.println(fontname);
out.flush();
out.close();
}
}
/**
* Creates a set containing information about the not-embedded fonts within the src PDF file.
* @param src the path to a PDF file
* @throws IOException
*/
public Set<String> listFonts(String src) throws IOException {
Set<String> set = new TreeSet<String>();
PdfReader reader = new PdfReader(src);
PdfDictionary resources;
for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
resources = reader.getPageN(k).getAsDict(PdfName.RESOURCES);
processResource(set, resources);
}
reader.close();
return set;
}
/**
* Finds out if the font is an embedded subset font
* @param font name
* @return true if the name denotes an embedded subset font
*/
private boolean isEmbeddedSubset(String name) {
//name = String.format("%s subset (%s)", name.substring(8), name.substring(1, 7));
return name != null && name.length() > 8 && name.charAt(7) == '+';
}
private void processFont(PdfDictionary font, Set<String> set) {
String name = font.getAsName(PdfName.BASEFONT).toString();
if(isEmbeddedSubset(name))
return;
PdfDictionary desc = font.getAsDict(PdfName.FONTDESCRIPTOR);
//nofontdescriptor
if (desc == null) {
PdfArray descendant = font.getAsArray(PdfName.DESCENDANTFONTS);
if (descendant == null) {
set.add(name.substring(1));
}
else {
for (int i = 0; i < descendant.size(); i++) {
PdfDictionary dic = descendant.getAsDict(i);
processFont(dic, set);
}
}
}
/**
* (Type 1) embedded
*/
else if (desc.get(PdfName.FONTFILE) != null)
;
/**
* (TrueType) embedded
*/
else if (desc.get(PdfName.FONTFILE2) != null)
;
/**
* " (" + font.getAsName(PdfName.SUBTYPE).toString().substring(1) + ") embedded"
*/
else if (desc.get(PdfName.FONTFILE3) != null)
;
else {
set.add(name.substring(1));
}
}
/**
* Extracts the names of the not-embedded fonts from page or XObject resources.
* @param set the set with the font names
* @param resources the resources dictionary
*/
public void processResource(Set<String> set, PdfDictionary resource) {
if (resource == null)
return;
PdfDictionary xobjects = resource.getAsDict(PdfName.XOBJECT);
if (xobjects != null) {
for (PdfName key : xobjects.getKeys()) {
processResource(set, xobjects.getAsDict(key));
}
}
PdfDictionary fonts = resource.getAsDict(PdfName.FONT);
if (fonts == null)
return;
PdfDictionary font;
for (PdfName key : fonts.getKeys()) {
font = fonts.getAsDict(key);
processFont(font, set);
}
}
The code above could be used to retrieve the fonts that are not embedded in the given PDF file. I've improved the code from iText in Action so that it can handle Font's DescendantFont node, too.
When you create Chunk, you declare what font you use.
Create BaseFont from the font you want to use and declare is as BaseFont.EMBEDDED.
Note that when you not set option subset to true, the whole font will be embedded.
Be aware that embedding font might violate authorship rights.
I don't think this is an "iText" use case. Use either PDFBox or jPod. These implement the PDF model and as such enable you to:
- open the document
- recurse from the document root down the object tree
- check if this is a font object
- check if the font file is available
A check if only embedded fonts are used is by far more complex (this is , fonts that are not embedded but not used are fine).
The simplest answer, is to open the PDF file with Adobe Acrobat then:
- click on File
- select Properties
- click on the Fonts tab
This will show you a list of all fonts in the document. Any font that is embedded will display "(Embedded)" next to the font name.
For example:
ACaslonPro-Bold (Embedded)
where ACaslonPro-Bold is derived from the file name that you embedded it with (e.g. FontFactory.register("/path/to/ACaslonPro-Bold.otf",...
精彩评论