当前位置：首页 > article >正文

获取pdf文件文字图片内容以及在PDF中插入图片

article 2025/2/28 15:55:26

该功能用到如下jar包：

//获取图片数据
	public void huoqu() throws IOException {
		File file = new File("C:\\Users\\xiaomage\\Desktop\\123.pdf");
        PDDocument document = Loader.loadPDF(file);
       /* PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(document);
        System.out.println(text);*/

        int allPages = document.getNumberOfPages();
        System.out.println(allPages);
        int count=0;
        for (int i = 0; i < allPages; i++) {
            System.out.println(i);
            PDPage page = document.getPage(i);
            PDResources resources = page.getResources();
            Iterable<COSName> xObjectNames = resources.getXObjectNames();

            if (xObjectNames != null){
                Iterator<COSName> iterator = xObjectNames.iterator();

                while (iterator.hasNext()){

                    COSName key =  iterator.next();
                    System.out.println("key:"+key);
                    //if (resources.isImageXObject(key)){
                        PDImageXObject image = (PDImageXObject) resources.getXObject(key);
                        BufferedImage bImage = image.getImage();
                        ImageIO.write(bImage, "PNG", new File("D:\\image\\"+"image_"+ (i+1) + "页" + count  + ".png"));
                    count++;
                   // }
                }
            }
        }
	}




public static void main(String[] args) throws Exception {	
		File resultFile = new File("resultFile//"); 
		//获取结果文件夹路径
		String resultPath=resultFile.getCanonicalPath();
		System.err.println(resultPath);
		logger.info("123");
		//获取结果文件夹路径
		File imgFile = new File("imgFile//"); 
		//获取原始文件夹路径
		File sourceFile = new File("sourceFile//"); 
		service(sourceFile, imgFile, resultPath);
		
    }
	
	
	//逻辑处理
	public static void service(File sourceFile,File imgFile,String resultPath) throws Exception{
		//失败结果
		File errorFile = new File(""); 
		String errorPath=errorFile.getCanonicalPath(); 
		String errorFileName=errorPath+"//error.txt";
		BufferedWriter bufferedWriter=new BufferedWriter(new FileWriter(errorFileName, true));
		//获取图片文件夹路径
		String imgPath=imgFile.getCanonicalPath(); 
		
		//获取源文件名
		String sourcePath=sourceFile.getCanonicalPath(); 
		List<String> sourcefileNameList=new ArrayList<String>();
		sourcefileNameList=getAllFiles(sourceFile, sourcefileNameList);
		
		
		if(sourcefileNameList!=null && sourcefileNameList.size()>0) {
			 for(String sourcefileName: sourcefileNameList) {
				Map<String, String> mapResult= huoquText(sourcePath+"//"+sourcefileName, imgFile);
				if(mapResult.get("isTrue").equals("true")) {
					String imgName=mapResult.get("imgName");
					String imgNamePath=imgPath+"//"+imgName;
					//插入印章
					xiugai(sourcePath+"//"+sourcefileName, resultPath+"//"+sourcefileName, imgNamePath);
					bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+"：文件名称："+sourcefileName+" 结果：success"+" \n");
					logger.info(sourcefileName+"："+mapResult.get("msg"));
				}else {
					logger.info(sourcefileName+"："+mapResult.get("msg"));
					bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+"：文件名称"+sourcefileName+" 结果："+mapResult.get("msg")+" \n");
				}
			 }
		}else {
			logger.info("原始文件夹没有文件");
			bufferedWriter.append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())+"：sourceFile没有文件"+" \n");
		}
		
		bufferedWriter.flush();
		bufferedWriter.close();
		
	}
//获取文件夹里的文件
	public static List<String> getAllFiles(File folder, List<String> fileNameList) {
        File[] files = folder.listFiles(); // 获取文件夹中所有文件和子文件夹 
        if (files != null) {
            for (File file : files) {                
                // 如果是文件，则输出文件名
                String sfileName=file.getName(); 
                fileNameList.add(sfileName);
            }
        }
		return fileNameList;
    }
	
	
	
	
	//获取文字数据
		public static  Map<String, String> huoquText(String fileNamePath,File imgFileS ) throws IOException {
			 Map<String, String> map=new HashMap<String, String>();
			List<String> fileNameList=new ArrayList<String>();
			fileNameList=getAllFiles(imgFileS, fileNameList);
			if(fileNameList!=null && fileNameList.size()>0) {
				//File file = new File("C:\\Users\\xiaomage\\Desktop\\123.pdf");
				System.err.println(fileNamePath);
				File file = new File(fileNamePath);
		        PDDocument document = Loader.loadPDF(file);
		        PDFTextStripper stripper = new PDFTextStripper();
		        String text = stripper.getText(document);
		      
		        String text1= text.replace("\r\n", "===");
		        String text2=text1.split("===¥")[0];
		        String text3=text2.substring(text2.lastIndexOf("===")+3, text2.length());
		      System.err.println("text3:"+text3);
		        boolean flag=false;
		        for(String imgName: fileNameList) {
		        	String fileName=imgName.substring(0, imgName.indexOf("."));
		        	 System.err.println("fileName:"+fileName);
		        	 System.err.println("text fileName:"+fileName.equals(text3));
		        	if(fileName.equals(text3)) {
		        		map.put("isTrue", "true");
		        		map.put("imgName", imgName);
		        		map.put("msg", "success");
		        		flag=true;
		        		break;
		        	}
		        }
		        if(flag==false) {
		        	map.put("isTrue", "fail");	        	
	        		map.put("msg", "图库无该图片：文件路径"+fileNamePath);
		        }
		        	        
			}else {
				map.put("isTrue", "fail");      		
        		map.put("msg", "图库无数据图片");
			}
			return map;
		}
	
		//插入图片
		public static void xiugai(String basepath,String reultpath,String imgpath) throws IOException {
				System.err.println(basepath);
				 // 加载现有的PDF文档
				PdfDocument pdf = new PdfDocument();
				//pdf.loadFromFile("C:\\Users\\xiaomage\\Desktop\\123.pdf");
				pdf.loadFromFile(basepath);
				
			/*	由于使用  Spire.Pdf 生成的书签带有 Evaluation Warning : The document was created with Spire.PDF for .NET. 字样

				但是它只在第一页头部有显示，我们可以新增一页，并删掉第一页即可*/
				
				PdfPageBase pb = pdf.getPages().add(); //新增一页
				//删除
				pdf.getPages().remove(pb);
				
		      //获取所需的页面
		        PdfPageBase page = pdf.getPages().get(0);

		        Rectangle2D pageSize = page.getMediaBox();
		        Double pageWidth = pageSize.getWidth();
		        Double pageHeight = pageSize.getHeight();
		        System.err.println(pageWidth);
		        System.err.println(pageHeight);
		        //加载图像
		       // PdfImage image = PdfImage.fromFile("C:\\Users\\xiaomage\\Desktop\\123.png");
		        PdfImage image = PdfImage.fromFile(imgpath);
		        //指定页面上图像区域的宽度和高度
		        float width = image.getWidth() * 0.37f;
		        float height = image.getHeight() * 0.37f;

		        //指定 X 和 Y 坐标以开始绘制图像
		        Double x = pageWidth-width-25;
		        Double y = pageHeight-height-20;
		        System.err.println(x);
		        System.err.println(y);
		        //在页面指定位置上绘制图像
		        page.getCanvas().drawImage(image, x, y, width, height);

		        //保存结果文档
		        //pdf.saveToFile("C:\\Users\\xiaomage\\Desktop\\277.pdf");
		        pdf.saveToFile(reultpath);
		    }

查看全文

http://www.kler.cn/a/429558.html