当前位置:首页 » 编程语言 » javapoiword

javapoiword

发布时间: 2022-12-08 15:03:34

java POI 如何操作word 格式

1、环境支持

1.1 添加poi支持:包下载地址http://www.apache.org/dyn/closer.cgi/poi/release/

1.2 POI对Excel文件的读取操作比较方便,POI还提供对Word的DOC格式文件的读取。但在它的发行版本中没有发布对Word支持的模块,需要另外下载一个POI的扩展的Jar包。下载地址为http://www.ibiblio.org/maven2/org/textmining/tm-extractors/0.4/ 下载extractors-0.4_zip这个文件

package com.ray.poi.util;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.textmining.text.extraction.WordExtractor;

/**
* 读写doc
* @author wangzonghao
*
*/
public class POIWordUtil {
/**
* 读入doc
* @param doc
* @return
* @throws Exception
*/
public static String readDoc(String doc) throws Exception {
// 创建输入流读取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 创建WordExtractor
extractor = new WordExtractor();
// 对DOC文件进行提取
text = extractor.extractText(in);
return text;
}
/**
* 写出doc
* @param path
* @param content
* @return
*/
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {

// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();

ByteArrayInputStream s = new ByteArrayInputStream(b);

POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();

DocumentEntry de = directory.createDocument("WordDocument", s);

FileOutputStream ostream = new FileOutputStream(path);

fs.writeFilesystem(ostream);

s.close();
ostream.close();

} catch (IOException e) {
e.printStackTrace();
}
return w;
}

}
测试

package com.ray.poi.util;

import junit.framework.TestCase;

public class POIUtilTest extends TestCase {

public void testReadDoc() {
try{
String text = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}

}

public void testWriteDoc() {
String wr;
try {
wr = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");

boolean b = POIWordUtil.writeDoc("c:\\demo.doc",wr);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}

⑵ JAVA使用POI读写word 乱码



public static void main(String args[])
throws Exception
{
XWPFDocument doc = new XWPFDocument();
XWPFParagraph p1 = doc.createParagraph();
p1.setAlignment(ParagraphAlignment.CENTER);
p1.setBorderBottom(Borders.DOUBLE);
p1.setBorderTop(Borders.DOUBLE);
p1.setBorderRight(Borders.DOUBLE);
p1.setBorderLeft(Borders.DOUBLE);
p1.setBorderBetween(Borders.SINGLE);
p1.setVerticalAlignment(TextAlignment.TOP);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setBold(true);
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
r1.setTextPosition(100);
XWPFParagraph p2 = doc.createParagraph();
p2.setAlignment(ParagraphAlignment.RIGHT);
p2.setBorderBottom(Borders.DOUBLE);
p2.setBorderTop(Borders.DOUBLE);
p2.setBorderRight(Borders.DOUBLE);
p2.setBorderLeft(Borders.DOUBLE);
p2.setBorderBetween(Borders.SINGLE);
XWPFRun r2 = p2.createRun();
r2.setText("jumped over the lazy dog");
r2.setStrike(true);
r2.setFontSize(20);
XWPFRun r3 = p2.createRun();
r3.setText("and went away");
r3.setStrike(true);
r3.setFontSize(20);
r3.setSubscript(VerticalAlign.SUPERSCRIPT);
XWPFParagraph p3 = doc.createParagraph();
p3.setWordWrap(true);
p3.setPageBreak(true);
p3.setAlignment(ParagraphAlignment.BOTH);
p3.setSpacingLineRule(LineSpacingRule.EXACT);
p3.setIndentationFirstLine(600);
XWPFRun r4 = p3.createRun();
r4.setTextPosition(20);
r4.setText("To be, or not to be: that is the question: Whether 'tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; ");
r4.addBreak(BreakType.PAGE);
r4.setText("No more; and by a sleep to say we end The heart-ache and the thousand natural shocks That flesh is heir to, 'tis a consummation Devoutly to be wish'd. To die, to sleep; To sleep: perchance to dream: ay, there's the rub; .......");
r4.setItalic(true);
XWPFRun r5 = p3.createRun();
r5.setTextPosition(-10);
r5.setText("For in that sleep of death what dreams may come");
r5.addCarriageReturn();
r5.setText("When we have shuffled off this mortal coil,Must give us pause: there's the respectThat makes calamity of so long life;");
r5.addBreak();
r5.setText("For who would bear the whips and scorns of time,The oppressor's wrong, the proud man's contumely,");
r5.addBreak(BreakClear.ALL);
r5.setText("The pangs of despised love, the law's delay,The insolence of office and the spurns.......");
FileOutputStream out = new FileOutputStream("simple.docx");
doc.write(out);
out.close();
}

⑶ java 中用poi读取word和用docx4j读取word

不知道你是具体读取Word里面的什么元素,下面以读取文字和图片为例吧,两个代码示例,你参考看看:

  1. 读取文本

import com.spire.doc.Document;

import java.io.FileWriter;

import java.io.IOException;

public class ExtractText {

public static void main(String[] args) throws IOException {

//加载Word文档
Document document = new Document();
document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");

//获取文档中的文本保存为String
String text=document.getText();

//将String写入Txt文件
writeStringToTxt(text,"ExtractedText.txt");
}

public static void writeStringToTxt(String content, String txtFileName) throws IOException {

FileWriter fWriter= new FileWriter(txtFileName,true);
try {
fWriter.write(content);
}catch(IOException ex){
ex.printStackTrace();
}finally{
try{
fWriter.flush();
fWriter.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

}


2. 读取图片


import com.spire.doc.Document;

import com.spire.doc.documents.DocumentObjectType;

import com.spire.doc.fields.DocPicture;

import com.spire.doc.interfaces.ICompositeObject;

import com.spire.doc.interfaces.IDocumentObject;

import javax.imageio.ImageIO;

import java.awt.image.BufferedImage;

import java.io.File;

import java.io.IOException;

import java.util.ArrayList;

import java.util.LinkedList;

import java.util.List;

import java.util.Queue;

public class ExtractImages {

public static void main(String[] args) throws IOException {

//加载Word文档

Document document = new Document();

document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");

//创建Queue对象

Queue nodes = new LinkedList();

nodes.add(document);

//创建List对象

List images = new ArrayList();

//遍历文档中的子对象

while (nodes.size() > 0) {

ICompositeObject node = nodes.poll();

for (int i = 0; i < node.getChildObjects().getCount(); i++) {

IDocumentObject child = node.getChildObjects().get(i);

if (child instanceof ICompositeObject) {

nodes.add((ICompositeObject) child);

//获取图片并添加到List

if (child.getDocumentObjectType() == DocumentObjectType.Picture) {

DocPicture picture = (DocPicture) child;

images.add(picture.getImage());

}

}

}

}

//将图片保存为PNG格式文件

for (int i = 0; i < images.size(); i++) {

File file = new File(String.format("output/图片-%d.png", i));

ImageIO.write(images.get(i), "PNG", file);

}

}

}

注意这里使用的jar包是spire.doc.jar,需要在java程序中先导入jar文件。

⑷ Java 利用poi 可以直接读取word中的表格保持样式生成新的word么

1.读取word
2003及word
2007需要的jar包
读取
2003
版本(.doc)的word文件相对来说比较简单,只需要
poi-3.5-beta6-20090622.jar

poi-scratchpad-3.5-beta6-20090622.jar
两个
jar
包即可,

2007
版本(.docx)就麻烦多,我说的这个麻烦不是我们写代码的时候麻烦,是要导入的
jar
包比较的多,有如下
7
个之多:
1.
openxml4j-bin-beta.jar
2.
poi-3.5-beta6-20090622.jar
3.
poi-ooxml-3.5-beta6-20090622.jar
4
.dom4j-1.6.1.jar
5.
geronimo-stax-api_1.0_spec-1.0.jar
6.
ooxml-schemas-1.0.jar
7.
xmlbeans-2.3.0.jar
其中
4-7

poi-ooxml-3.5-beta6-20090622.jar
所依赖的
jar
包(在
poi-bin-3.5-beta6-20090622.tar.gz
中的
ooxml-lib
目录下可以找到)。
2.换行符号
硬换行:文件中换行,如果是键盘中使用了"enter"的换行。
软换行:文件中一行的字符数容量有限,当字符数量超过一定值时,会自动切到下行显示。
对程序来说,硬换行才是可以识别的、确定的换行,软换行与字体大小、缩进有关。
3.读取的注意事项
值得注意的是:
POI
在读取不会读取
word
文件中的图片信息;
还有就是对于
2007
版的
word(.docx),
如果
word
文件中有表格,所有表格中的数据都会在读取出来的字符串的最后。
4.读取word文本内容代码
1
import
java.io.File;
2
import
java.io.FileInputStream;
3
import
java.io.InputStream;
4
5
import
org.apache.poi.POIXMLDocument;
6
import
org.apache.poi.POIXMLTextExtractor;
7
import
org.apache.poi.hwpf.extractor.WordExtractor;
8
import
org.apache.poi.openxml4j.opc.OPCPackage;
9
import
org.apache.poi.xwpf.extractor.XWPFWordExtractor;
10
11
public
class
Test
{
12
public
static
void
main(String[]
args)
{
13
try
{
14
InputStream
is
=
new
FileInputStream(new
File("2003.doc"));
15
WordExtractor
ex
=
new
WordExtractor(is);
16
String
text2003
=
ex.getText();
17
System.out.println(text2003);
18
19
OPCPackage
opcPackage
=
POIXMLDocument.openPackage("2007.docx");
20
POIXMLTextExtractor
extractor
=
new
XWPFWordExtractor(opcPackage);
21
String
text2007
=
extractor.getText();
22
System.out.println(text2007);
23
24
}
catch
(Exception
e)
{
25
e.printStackTrace();
26
}
27
}
28
}

⑸ 如何使用JAVA,POI读写word文档

public class HwpfTest {

@SuppressWarnings("deprecation")
@Test
public void testReadByExtractor() throws Exception {
InputStream is = new FileInputStream("D:\\test.doc");
WordExtractor extractor = new WordExtractor(is);
//输出word文档所有的文本
System.out.println(extractor.getText());
System.out.println(extractor.getTextFromPieces());
//输出页眉的内容
System.out.println("页眉:" + extractor.getHeaderText());
//输出页脚的内容
System.out.println("页脚:" + extractor.getFooterText());
//输出当前word文档的元数据信息,包括作者、文档的修改时间等。
System.out.println(extractor.getMetadataTextExtractor().getText());
//获取各个段落的文本
String paraTexts[] = extractor.getParagraphText();
for (int i=0; i<paraTexts.length; i++) {
System.out.println("Paragraph " + (i+1) + " : " + paraTexts[i]);
}
//输出当前word的一些信息
printInfo(extractor.getSummaryInformation());
//输出当前word的一些信息
this.printInfo(extractor.getDocSummaryInformation());
this.closeStream(is);
}

/**
* 输出SummaryInfomation
* @param info
*/
private void printInfo(SummaryInformation info) {
//作者
System.out.println(info.getAuthor());
//字符统计
System.out.println(info.getCharCount());
//页数
System.out.println(info.getPageCount());
//标题
System.out.println(info.getTitle());
//主题
System.out.println(info.getSubject());
}

/**
* 输出DocumentSummaryInfomation
* @param info
*/
private void printInfo(DocumentSummaryInformation info) {
//分类
System.out.println(info.getCategory());
//公司
System.out.println(info.getCompany());
}

/**
* 关闭输入流
* @param is
*/
private void closeStream(InputStream is) {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

}

⑹ Java poi操作word, 插入文字会多出一个换行

目测应该是多添加了段落,试着获取原有单元格中的段落然后添加文本

⑺ 怎么使用JAVA,POI读写word文档

如何使用JAVA、POI读写word文档??
能不能将一个word的内容完全读过来,放到一个新生成的word文件中去,要求能将word中的表格、图片等保留,格式不变。最好能给个例子?网上多是很早以前的那个解决方法如下:,只能读文本内容,且新生成的word文件打开时总是要提示选择编码,不太好用,希望能有新的解决方案??!!

poi操作word
1.1 添加poi支持:包下载地址

1.2 POI对Excel文件的读取操作比较方便,POI还提供对Word的DOC格式文件的读取。但在它的发行版本中没有发布对Word支持的模块,需要另外下载一个POI的扩展的Jar包。下载地址为;下载extractors-0.4_zip这个文件

2、提取Doc文件内容

public static String readDoc(String doc) throws Exception {
// 创建输入流读取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 创建WordExtractor
extractor = new WordExtractor();
// 对DOC文件进行提取
text = extractor.extractText(in);
return text;
}

public static void main(String[] args) {
try{
String text = WordReader.readDoc("c:/test.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}
}

3、写入Doc文档

import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class WordWriter {
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {

// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();

ByteArrayInputStream s = new ByteArrayInputStream(b);

POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();

DocumentEntry de = directory.createDocument("WordDocument", s);

FileOutputStream ostream = new FileOutputStream(path);

fs.writeFilesystem(ostream);

s.close();
ostream.close();

} catch (IOException e) {
e.printStackTrace();
}
return w;
}
public static void main(String[] args) throws Exception{
String wr=WordReader.readDoc("D:\\test.doc");
boolean b = writeDoc("D:\\result.doc",wr);

⑻ java poi 怎么控制输出word每行的内容

你好,试试以下代码行不行。
package com.sample;

import java.awt.Color;
import java.io.FileOutputStream;
import java.io.IOException;

import com.lowagie.text.Cell;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Element;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Image;
import com.lowagie.text.PageSize;
import com.lowagie.text.Paragraph;
import com.lowagie.text.Phrase;
import com.lowagie.text.Table;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.rtf.RtfWriter2;

/**
*
* @author wangyanjun
* @email [email protected]
* @createDate Jun 12, 2008
*/
public class CreateWordDemo {

public void createDocContext(String file) throws DocumentException,
IOException {
// 设置纸张大小
Document document = new Document(PageSize.A4);
// 建立一个书写器(Writer)与document对象关联,通过书写器(Writer)可以将文档写入到磁盘中
RtfWriter2.getInstance(document, new FileOutputStream(file));
document.open();
// 设置中文字体
BaseFont bfChinese = BaseFont.createFont("STSongStd-Light",
"UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
// 标题字体风格
Font titleFont = new Font(bfChinese, 12, Font.BOLD);
// 正文字体风格
Font contextFont = new Font(bfChinese, 10, Font.NORMAL);
Paragraph title = new Paragraph("标题");
// 设置标题格式对齐方式
title.setAlignment(Element.ALIGN_CENTER);
title.setFont(titleFont);
document.add(title);

String contextString = "iText是一个能够快速产生PDF文件的java类库。"
+ " \n"// 换行
+ "iText的java类对于那些要产生包含文本,"
+ "表格,图形的只读文档是很有用的。它的类库尤其与java Servlet有很好的给合。"
+ "使用iText与PDF能够使你正确的控制Servlet的输出。";
Paragraph context = new Paragraph(contextString);
// 正文格式左对齐
context.setAlignment(Element.ALIGN_LEFT);
context.setFont(contextFont);
// 离上一段落(标题)空的行数
context.setSpacingBefore(5);
// 设置第一行空的列数
context.setFirstLineIndent(20);
document.add(context);

//利用类FontFactory结合Font和Color可以设置各种各样字体样式
/**
* Font.UNDERLINE 下划线,Font.BOLD 粗体
*/
Paragraph underline = new Paragraph("下划线的实现", FontFactory.getFont(
FontFactory.HELVETICA_BOLDOBLIQUE, 18, Font.UNDERLINE,
new Color(0, 0, 255)));
document.add(underline);

// 设置 Table 表格
Table aTable = new Table(3);
int width[] = {25,25,50};
aTable.setWidths(width);//设置每列所占比例
aTable.setWidth(90); // 占页面宽度 90%

aTable.setAlignment(Element.ALIGN_CENTER);//居中显示
aTable.setAlignment(Element.ALIGN_MIDDLE);//纵向居中显示
aTable.setAutoFillEmptyCells(true); //自动填满
aTable.setBorderWidth(1); //边框宽度
aTable.setBorderColor(new Color(0, 125, 255)); //边框颜色
aTable.setPadding(2);//衬距,看效果就知道什么意思了
aTable.setSpacing(3);//即单元格之间的间距
aTable.setBorder(2);//边框

//设置表头
/**
* cell.setHeader(true);是将该单元格作为表头信息显示;
* cell.setColspan(3);指定了该单元格占3列;
* 为表格添加表头信息时,要注意的是一旦表头信息添加完了之后,
* 必须调用 endHeaders()方法,否则当表格跨页后,表头信息不会再显示
*/
Cell haderCell = new Cell("表格表头");
haderCell.setHeader(true);
haderCell.setColspan(3);
aTable.addCell(haderCell);
aTable.endHeaders();

Font fontChinese = new Font(bfChinese, 12, Font.NORMAL, Color.GREEN);
Cell cell = new Cell(new Phrase("这是一个测试的 3*3 Table 数据", fontChinese ));
cell.setVerticalAlignment(Element.ALIGN_TOP);
cell.setBorderColor(new Color(255, 0, 0));
cell.setRowspan(2);
aTable.addCell(cell);

aTable.addCell(new Cell("#1"));
aTable.addCell(new Cell("#2"));
aTable.addCell(new Cell("#3"));
aTable.addCell(new Cell("#4"));
Cell cell3 = new Cell(new Phrase("一行三列数据", fontChinese ));
cell3.setColspan(3);
cell3.setVerticalAlignment(Element.ALIGN_CENTER);
aTable.addCell(cell3);

document.add(aTable);
document.add(new Paragraph("\n"));
//添加图片
Image img=Image.getInstance("d:\\img01800.jpg");
img.setAbsolutePosition(0, 0);
img.setAlignment(Image.RIGHT);//设置图片显示位置
img.scaleAbsolute(12,35);//直接设定显示尺寸
img.scalePercent(50);//表示显示的大小为原尺寸的50%
img.scalePercent(25, 12);//图像高宽的显示比例
img.setRotation(30);//图像旋转一定角度
document.add(img);

document.close();
}

/**
* @param args
*/
public static void main(String[] args) {
CreateWordDemo word = new CreateWordDemo();
String file = "c:/demo1.doc";
try {
word.createDocContext(file);
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}

}

}

⑼ 怎么用java poi生成word表格

rt java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class XwpfTUtil {

/*String filePath = "/sta.docx";
InputStream is;
XWPFDocument doc;
Map<String, Object> params = new HashMap<String, Object>();

{
params.put("${name}", "xxx");
params.put("${sex}", "男");
params.put("${political}", "共青团员");
params.put("${place}", "sssss");
params.put("${classes}", "3102");
params.put("${id}", "213123123");
params.put("${qq}", "213123");
params.put("${tel}", "312313213");
params.put("${oldJob}", "sadasd");
params.put("${swap}", "是");
params.put("${first}", "asdasd");
params.put("${second}", "综合事务部");
params.put("${award}", "asda");
params.put("${achievement}", "完成科协网站的开发");
params.put("${advice}", "没有建议");

热点内容
网易梦之国服务器ip 发布:2024-05-05 14:06:11 浏览:34
如何设置一个通俗易懂的密码 发布:2024-05-05 13:52:21 浏览:621
新网易我的世界服务器 发布:2024-05-05 13:42:44 浏览:662
算法题写错了 发布:2024-05-05 13:34:57 浏览:804
sql按小时分组 发布:2024-05-05 13:26:25 浏览:94
张艺谋我们一家访问人 发布:2024-05-05 12:38:05 浏览:111
美版安卓系统怎么安装 发布:2024-05-05 12:37:18 浏览:920
qq邮箱缓存地址 发布:2024-05-05 12:37:16 浏览:986
电位算法 发布:2024-05-05 12:36:01 浏览:727
我的世界清风斗罗大陆服务器地址 发布:2024-05-05 12:35:50 浏览:453