获取pdf文件文字图片内容以及在PDF中插入图片
该功能用到如下jar包:
//获取图片数据
public void huoqu() throws IOException {
File file = new File("C:\\Users\\xiaomage\\Desktop\\123.pdf");
PDDocument document = Loader.loadPDF(file);
/* PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
System.out.println(text);*/
int allPages = document.getNumberOfPages();
System.out.println(allPages);
int count=0;
for (int i = 0; i < allPages; i++) {
System.out.println(i);
PDPage page = document.getPage(i);
PDResources resources = page.getResources();
Iterable<COSName> xObjectNames = resources.getXObjectNames();
if (xObjectNames != null){
Iterator<COSName> iterator = xObjectNames.iterator();
while (iterator.hasNext()){
COSName key = iterator.next();
System.out.println("key:"+key);
//if (resources.isImageXObject(key)){
PDImageXObject image = (PDImageXObject) resources.getXObject(key);
BufferedImage bImage = image.getImage();
ImageIO.write(bImage, "PNG", new File("D:\\image\\"+"image_"+ (i+1) + "页" + count + ".png"));
count++;
// }
}
}
}
}
public static void main(String[] args) throws Exception {
File resultFile = new File("resultFile//");
//获取结果文件夹路径
String resultPath=resultFile.getCanonicalPath();
System.err.println(resultPath);
logger.info("123");
//获取结果文件夹路径
File imgFile = new File("imgFile//");
//获取原始文件夹路径
File sourceFile = new File("sourceFile//");
service(sourceFile, imgFile, resultPath);
}
//逻辑处理
public static void service(File sourceFile,File imgFile,String resultPath) throws Exception{
//失败结果
File errorFile = new File("");
String errorPath=errorFile.getCanonicalPath();
String errorFileName=errorPath+"//error.txt";
BufferedWriter bufferedWriter=new BufferedWriter(new FileWriter(errorFileName, true));
//获取图片文件夹路径
String imgPath=imgFile.getCanonicalPath();
//获取源文件名
String sourcePath=sourceFile.getCanonicalPath();
List<String> sourcefileNameList=new ArrayList<String>();
sourcefileNameList=getAllFiles(sourceFile, sourcefileNameList);
if(sourcefileNameList!=null && sourcefileNameList.size()>0) {
for(String sourcefileName: sourcefileNameList) {
Map<String, String> mapResult= huoquText(sourcePath+"//"+sourcefileName, imgFile);
if(mapResult.get("isTrue").equals("true")) {
String imgName=mapResult.get("imgName");
String imgNamePath=imgPath+"//"+imgName;
//插入印章
xiugai(sourcePath+"//"+sourcefileName, resultPath+"//"+sourcefileName, imgNamePath);
bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+":文件名称:"+sourcefileName+" 结果:success"+" \n");
logger.info(sourcefileName+":"+mapResult.get("msg"));
}else {
logger.info(sourcefileName+":"+mapResult.get("msg"));
bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+":文件名称"+sourcefileName+" 结果:"+mapResult.get("msg")+" \n");
}
}
}else {
logger.info("原始文件夹没有文件");
bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+":sourceFile没有文件"+" \n");
}
bufferedWriter.flush();
bufferedWriter.close();
}
//获取文件夹里的文件
public static List<String> getAllFiles(File folder, List<String> fileNameList) {
File[] files = folder.listFiles(); // 获取文件夹中所有文件和子文件夹
if (files != null) {
for (File file : files) {
// 如果是文件,则输出文件名
String sfileName=file.getName();
fileNameList.add(sfileName);
}
}
return fileNameList;
}
//获取文字数据
public static Map<String, String> huoquText(String fileNamePath,File imgFileS ) throws IOException {
Map<String, String> map=new HashMap<String, String>();
List<String> fileNameList=new ArrayList<String>();
fileNameList=getAllFiles(imgFileS, fileNameList);
if(fileNameList!=null && fileNameList.size()>0) {
//File file = new File("C:\\Users\\xiaomage\\Desktop\\123.pdf");
System.err.println(fileNamePath);
File file = new File(fileNamePath);
PDDocument document = Loader.loadPDF(file);
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
String text1= text.replace("\r\n", "===");
String text2=text1.split("===¥")[0];
String text3=text2.substring(text2.lastIndexOf("===")+3, text2.length());
System.err.println("text3:"+text3);
boolean flag=false;
for(String imgName: fileNameList) {
String fileName=imgName.substring(0, imgName.indexOf("."));
System.err.println("fileName:"+fileName);
System.err.println("text fileName:"+fileName.equals(text3));
if(fileName.equals(text3)) {
map.put("isTrue", "true");
map.put("imgName", imgName);
map.put("msg", "success");
flag=true;
break;
}
}
if(flag==false) {
map.put("isTrue", "fail");
map.put("msg", "图库无该图片:文件路径"+fileNamePath);
}
}else {
map.put("isTrue", "fail");
map.put("msg", "图库无数据图片");
}
return map;
}
//插入图片
public static void xiugai(String basepath,String reultpath,String imgpath) throws IOException {
System.err.println(basepath);
// 加载现有的PDF文档
PdfDocument pdf = new PdfDocument();
//pdf.loadFromFile("C:\\Users\\xiaomage\\Desktop\\123.pdf");
pdf.loadFromFile(basepath);
/* 由于使用 Spire.Pdf 生成的书签带有 Evaluation Warning : The document was created with Spire.PDF for .NET. 字样
但是它只在第一页头部有显示,我们可以新增一页,并删掉第一页即可*/
PdfPageBase pb = pdf.getPages().add(); //新增一页
//删除
pdf.getPages().remove(pb);
//获取所需的页面
PdfPageBase page = pdf.getPages().get(0);
Rectangle2D pageSize = page.getMediaBox();
Double pageWidth = pageSize.getWidth();
Double pageHeight = pageSize.getHeight();
System.err.println(pageWidth);
System.err.println(pageHeight);
//加载图像
// PdfImage image = PdfImage.fromFile("C:\\Users\\xiaomage\\Desktop\\123.png");
PdfImage image = PdfImage.fromFile(imgpath);
//指定页面上图像区域的宽度和高度
float width = image.getWidth() * 0.37f;
float height = image.getHeight() * 0.37f;
//指定 X 和 Y 坐标以开始绘制图像
Double x = pageWidth-width-25;
Double y = pageHeight-height-20;
System.err.println(x);
System.err.println(y);
//在页面指定位置上绘制图像
page.getCanvas().drawImage(image, x, y, width, height);
//保存结果文档
//pdf.saveToFile("C:\\Users\\xiaomage\\Desktop\\277.pdf");
pdf.saveToFile(reultpath);
}