当前位置: 技术问答>java相关
pdf 格式转换成 txt 的java源程序?
来源: 互联网 发布时间:2015-05-17
本文导语: 那位高手收藏有pdf2txt的java源程序? (加50分) 或者是编程思路? (加50分) | 下载包: http://www.etymon.com/pj/ import java.io.*; import java.util.*; import com.etymon.pj.*; import com.etymon.pj.object....
那位高手收藏有pdf2txt的java源程序? (加50分)
或者是编程思路? (加50分)
或者是编程思路? (加50分)
|
下载包:
http://www.etymon.com/pj/
import java.io.*;
import java.util.*;
import com.etymon.pj.*;
import com.etymon.pj.object.*;
import com.etymon.pj.exception.*;
/**
* This is a wrapper for the Pj PDF parser
*/
public class PjWrapper {
Pdf pdf;
PjCatalog catalog;
PjPagesNode rootPage;
public PjWrapper(String PdfFileName) throws IOException, PjException {
pdf = new Pdf(PdfFileName);
// hopefully the catalog can never be a reference...
catalog = (PjCatalog) pdf.getObject(pdf.getCatalog());
// root node of pages tree is specified by a reference in the catalog
rootPage = (PjPagesNode) pdf.resolve(catalog.getPages());
}
public static void main(String[] args) throws IOException, PjException {
PjWrapper testWrapper = new PjWrapper(args[0]);
LinkedList textList = testWrapper.getAllText();
Object[] o = textList.toArray();
for (int i = 0;i
http://www.etymon.com/pj/
import java.io.*;
import java.util.*;
import com.etymon.pj.*;
import com.etymon.pj.object.*;
import com.etymon.pj.exception.*;
/**
* This is a wrapper for the Pj PDF parser
*/
public class PjWrapper {
Pdf pdf;
PjCatalog catalog;
PjPagesNode rootPage;
public PjWrapper(String PdfFileName) throws IOException, PjException {
pdf = new Pdf(PdfFileName);
// hopefully the catalog can never be a reference...
catalog = (PjCatalog) pdf.getObject(pdf.getCatalog());
// root node of pages tree is specified by a reference in the catalog
rootPage = (PjPagesNode) pdf.resolve(catalog.getPages());
}
public static void main(String[] args) throws IOException, PjException {
PjWrapper testWrapper = new PjWrapper(args[0]);
LinkedList textList = testWrapper.getAllText();
Object[] o = textList.toArray();
for (int i = 0;i