package com;
import java.io.File;
import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;//import java.io.FileInputStream;//import java.io.FileNotFoundException;//import java.io.IOException;//import java.util.HashMap;//import java.util.Iterator;//import java.util.Map;////import org.apache.poi.hwpf.HWPFDocument;//import org.apache.poi.hwpf.model.FieldsDocumentPart;//import org.apache.poi.hwpf.usermodel.Field;//import org.apache.poi.hwpf.usermodel.Fields;//import org.apache.poi.hwpf.usermodel.Range;import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.util.PDFTextStripper;import org.apache.poi.POITextExtractor;import org.apache.poi.extractor.ExtractorFactory;public class Test {
public static void main(String[] args) {
/* try {//word格式
String path="D:\\workspace\\MyPlatFileNew\\web\\content\\kent\\a6\\uploadattach\\iplat4j01361351007003_20130220170327.doc"; System.out.println("========"+path); File inputFile = new File(path); POITextExtractor extractor = ExtractorFactory.createExtractor(inputFile); System.out.println("Document Text: "); System.out.println("===================="); System.out.println(extractor.getText()); System.out.println("===================="); }catch (Exception ex) { ex.printStackTrace(); }*///pdf格式
FileInputStream fis = null; String path="D://知识积累//EL.pdf"; try { fis = new FileInputStream(path); } catch (FileNotFoundException e) { e.printStackTrace(); } PDFParser p = null; try { p = new PDFParser(fis); } catch (IOException e) { e.printStackTrace(); } try { p.parse(); } catch (IOException e) { e.printStackTrace(); } PDFTextStripper ts = null; try { ts = new PDFTextStripper(); } catch (IOException e1) { e1.printStackTrace(); } String s = null; try { s = ts.getText(p.getPDDocument()); System.out.println("----------begin------------"); System.out.println(s); System.out.println("-----------end-----------"); } catch (IOException e) { e.printStackTrace(); } try { fis.close(); } catch (IOException e) { e.printStackTrace(); } }}
需要用到的jar包有pdfbox-1.7.1.jar,poi-3.9-20121203.jar,poi-ooxml-3.9-20121203.jar。