當前位置:首頁 » 編程語言 » javapoiword

javapoiword

發布時間: 2022-12-08 15:03:34

java POI 如何操作word 格式

1、環境支持

1.1 添加poi支持:包下載地址http://www.apache.org/dyn/closer.cgi/poi/release/

1.2 POI對Excel文件的讀取操作比較方便,POI還提供對Word的DOC格式文件的讀取。但在它的發行版本中沒有發布對Word支持的模塊,需要另外下載一個POI的擴展的Jar包。下載地址為http://www.ibiblio.org/maven2/org/textmining/tm-extractors/0.4/ 下載extractors-0.4_zip這個文件

package com.ray.poi.util;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.textmining.text.extraction.WordExtractor;

/**
* 讀寫doc
* @author wangzonghao
*
*/
public class POIWordUtil {
/**
* 讀入doc
* @param doc
* @return
* @throws Exception
*/
public static String readDoc(String doc) throws Exception {
// 創建輸入流讀取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 創建WordExtractor
extractor = new WordExtractor();
// 對DOC文件進行提取
text = extractor.extractText(in);
return text;
}
/**
* 寫出doc
* @param path
* @param content
* @return
*/
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {

// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();

ByteArrayInputStream s = new ByteArrayInputStream(b);

POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();

DocumentEntry de = directory.createDocument("WordDocument", s);

FileOutputStream ostream = new FileOutputStream(path);

fs.writeFilesystem(ostream);

s.close();
ostream.close();

} catch (IOException e) {
e.printStackTrace();
}
return w;
}

}
測試

package com.ray.poi.util;

import junit.framework.TestCase;

public class POIUtilTest extends TestCase {

public void testReadDoc() {
try{
String text = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}

}

public void testWriteDoc() {
String wr;
try {
wr = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");

boolean b = POIWordUtil.writeDoc("c:\\demo.doc",wr);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}

⑵ JAVA使用POI讀寫word 亂碼



public static void main(String args[])
throws Exception
{
XWPFDocument doc = new XWPFDocument();
XWPFParagraph p1 = doc.createParagraph();
p1.setAlignment(ParagraphAlignment.CENTER);
p1.setBorderBottom(Borders.DOUBLE);
p1.setBorderTop(Borders.DOUBLE);
p1.setBorderRight(Borders.DOUBLE);
p1.setBorderLeft(Borders.DOUBLE);
p1.setBorderBetween(Borders.SINGLE);
p1.setVerticalAlignment(TextAlignment.TOP);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setBold(true);
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
r1.setTextPosition(100);
XWPFParagraph p2 = doc.createParagraph();
p2.setAlignment(ParagraphAlignment.RIGHT);
p2.setBorderBottom(Borders.DOUBLE);
p2.setBorderTop(Borders.DOUBLE);
p2.setBorderRight(Borders.DOUBLE);
p2.setBorderLeft(Borders.DOUBLE);
p2.setBorderBetween(Borders.SINGLE);
XWPFRun r2 = p2.createRun();
r2.setText("jumped over the lazy dog");
r2.setStrike(true);
r2.setFontSize(20);
XWPFRun r3 = p2.createRun();
r3.setText("and went away");
r3.setStrike(true);
r3.setFontSize(20);
r3.setSubscript(VerticalAlign.SUPERSCRIPT);
XWPFParagraph p3 = doc.createParagraph();
p3.setWordWrap(true);
p3.setPageBreak(true);
p3.setAlignment(ParagraphAlignment.BOTH);
p3.setSpacingLineRule(LineSpacingRule.EXACT);
p3.setIndentationFirstLine(600);
XWPFRun r4 = p3.createRun();
r4.setTextPosition(20);
r4.setText("To be, or not to be: that is the question: Whether 'tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; ");
r4.addBreak(BreakType.PAGE);
r4.setText("No more; and by a sleep to say we end The heart-ache and the thousand natural shocks That flesh is heir to, 'tis a consummation Devoutly to be wish'd. To die, to sleep; To sleep: perchance to dream: ay, there's the rub; .......");
r4.setItalic(true);
XWPFRun r5 = p3.createRun();
r5.setTextPosition(-10);
r5.setText("For in that sleep of death what dreams may come");
r5.addCarriageReturn();
r5.setText("When we have shuffled off this mortal coil,Must give us pause: there's the respectThat makes calamity of so long life;");
r5.addBreak();
r5.setText("For who would bear the whips and scorns of time,The oppressor's wrong, the proud man's contumely,");
r5.addBreak(BreakClear.ALL);
r5.setText("The pangs of despised love, the law's delay,The insolence of office and the spurns.......");
FileOutputStream out = new FileOutputStream("simple.docx");
doc.write(out);
out.close();
}

⑶ java 中用poi讀取word和用docx4j讀取word

不知道你是具體讀取Word裡面的什麼元素,下面以讀取文字和圖片為例吧,兩個代碼示例,你參考看看:

  1. 讀取文本

import com.spire.doc.Document;

import java.io.FileWriter;

import java.io.IOException;

public class ExtractText {

public static void main(String[] args) throws IOException {

//載入Word文檔
Document document = new Document();
document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");

//獲取文檔中的文本保存為String
String text=document.getText();

//將String寫入Txt文件
writeStringToTxt(text,"ExtractedText.txt");
}

public static void writeStringToTxt(String content, String txtFileName) throws IOException {

FileWriter fWriter= new FileWriter(txtFileName,true);
try {
fWriter.write(content);
}catch(IOException ex){
ex.printStackTrace();
}finally{
try{
fWriter.flush();
fWriter.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

}


2. 讀取圖片


import com.spire.doc.Document;

import com.spire.doc.documents.DocumentObjectType;

import com.spire.doc.fields.DocPicture;

import com.spire.doc.interfaces.ICompositeObject;

import com.spire.doc.interfaces.IDocumentObject;

import javax.imageio.ImageIO;

import java.awt.image.BufferedImage;

import java.io.File;

import java.io.IOException;

import java.util.ArrayList;

import java.util.LinkedList;

import java.util.List;

import java.util.Queue;

public class ExtractImages {

public static void main(String[] args) throws IOException {

//載入Word文檔

Document document = new Document();

document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");

//創建Queue對象

Queue nodes = new LinkedList();

nodes.add(document);

//創建List對象

List images = new ArrayList();

//遍歷文檔中的子對象

while (nodes.size() > 0) {

ICompositeObject node = nodes.poll();

for (int i = 0; i < node.getChildObjects().getCount(); i++) {

IDocumentObject child = node.getChildObjects().get(i);

if (child instanceof ICompositeObject) {

nodes.add((ICompositeObject) child);

//獲取圖片並添加到List

if (child.getDocumentObjectType() == DocumentObjectType.Picture) {

DocPicture picture = (DocPicture) child;

images.add(picture.getImage());

}

}

}

}

//將圖片保存為PNG格式文件

for (int i = 0; i < images.size(); i++) {

File file = new File(String.format("output/圖片-%d.png", i));

ImageIO.write(images.get(i), "PNG", file);

}

}

}

注意這里使用的jar包是spire.doc.jar,需要在java程序中先導入jar文件。

⑷ Java 利用poi 可以直接讀取word中的表格保持樣式生成新的word么

1.讀取word
2003及word
2007需要的jar包
讀取
2003
版本(.doc)的word文件相對來說比較簡單,只需要
poi-3.5-beta6-20090622.jar

poi-scratchpad-3.5-beta6-20090622.jar
兩個
jar
包即可,

2007
版本(.docx)就麻煩多,我說的這個麻煩不是我們寫代碼的時候麻煩,是要導入的
jar
包比較的多,有如下
7
個之多:
1.
openxml4j-bin-beta.jar
2.
poi-3.5-beta6-20090622.jar
3.
poi-ooxml-3.5-beta6-20090622.jar
4
.dom4j-1.6.1.jar
5.
geronimo-stax-api_1.0_spec-1.0.jar
6.
ooxml-schemas-1.0.jar
7.
xmlbeans-2.3.0.jar
其中
4-7

poi-ooxml-3.5-beta6-20090622.jar
所依賴的
jar
包(在
poi-bin-3.5-beta6-20090622.tar.gz
中的
ooxml-lib
目錄下可以找到)。
2.換行符號
硬換行:文件中換行,如果是鍵盤中使用了"enter"的換行。
軟換行:文件中一行的字元數容量有限,當字元數量超過一定值時,會自動切到下行顯示。
對程序來說,硬換行才是可以識別的、確定的換行,軟換行與字體大小、縮進有關。
3.讀取的注意事項
值得注意的是:
POI
在讀取不會讀取
word
文件中的圖片信息;
還有就是對於
2007
版的
word(.docx),
如果
word
文件中有表格,所有表格中的數據都會在讀取出來的字元串的最後。
4.讀取word文本內容代碼
1
import
java.io.File;
2
import
java.io.FileInputStream;
3
import
java.io.InputStream;
4
5
import
org.apache.poi.POIXMLDocument;
6
import
org.apache.poi.POIXMLTextExtractor;
7
import
org.apache.poi.hwpf.extractor.WordExtractor;
8
import
org.apache.poi.openxml4j.opc.OPCPackage;
9
import
org.apache.poi.xwpf.extractor.XWPFWordExtractor;
10
11
public
class
Test
{
12
public
static
void
main(String[]
args)
{
13
try
{
14
InputStream
is
=
new
FileInputStream(new
File("2003.doc"));
15
WordExtractor
ex
=
new
WordExtractor(is);
16
String
text2003
=
ex.getText();
17
System.out.println(text2003);
18
19
OPCPackage
opcPackage
=
POIXMLDocument.openPackage("2007.docx");
20
POIXMLTextExtractor
extractor
=
new
XWPFWordExtractor(opcPackage);
21
String
text2007
=
extractor.getText();
22
System.out.println(text2007);
23
24
}
catch
(Exception
e)
{
25
e.printStackTrace();
26
}
27
}
28
}

⑸ 如何使用JAVA,POI讀寫word文檔

public class HwpfTest {

@SuppressWarnings("deprecation")
@Test
public void testReadByExtractor() throws Exception {
InputStream is = new FileInputStream("D:\\test.doc");
WordExtractor extractor = new WordExtractor(is);
//輸出word文檔所有的文本
System.out.println(extractor.getText());
System.out.println(extractor.getTextFromPieces());
//輸出頁眉的內容
System.out.println("頁眉:" + extractor.getHeaderText());
//輸出頁腳的內容
System.out.println("頁腳:" + extractor.getFooterText());
//輸出當前word文檔的元數據信息,包括作者、文檔的修改時間等。
System.out.println(extractor.getMetadataTextExtractor().getText());
//獲取各個段落的文本
String paraTexts[] = extractor.getParagraphText();
for (int i=0; i<paraTexts.length; i++) {
System.out.println("Paragraph " + (i+1) + " : " + paraTexts[i]);
}
//輸出當前word的一些信息
printInfo(extractor.getSummaryInformation());
//輸出當前word的一些信息
this.printInfo(extractor.getDocSummaryInformation());
this.closeStream(is);
}

/**
* 輸出SummaryInfomation
* @param info
*/
private void printInfo(SummaryInformation info) {
//作者
System.out.println(info.getAuthor());
//字元統計
System.out.println(info.getCharCount());
//頁數
System.out.println(info.getPageCount());
//標題
System.out.println(info.getTitle());
//主題
System.out.println(info.getSubject());
}

/**
* 輸出DocumentSummaryInfomation
* @param info
*/
private void printInfo(DocumentSummaryInformation info) {
//分類
System.out.println(info.getCategory());
//公司
System.out.println(info.getCompany());
}

/**
* 關閉輸入流
* @param is
*/
private void closeStream(InputStream is) {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

}

⑹ Java poi操作word, 插入文字會多出一個換行

目測應該是多添加了段落,試著獲取原有單元格中的段落然後添加文本

⑺ 怎麼使用JAVA,POI讀寫word文檔

如何使用JAVA、POI讀寫word文檔??
能不能將一個word的內容完全讀過來,放到一個新生成的word文件中去,要求能將word中的表格、圖片等保留,格式不變。最好能給個例子?網上多是很早以前的那個解決方法如下:,只能讀文本內容,且新生成的word文件打開時總是要提示選擇編碼,不太好用,希望能有新的解決方案??!!

poi操作word
1.1 添加poi支持:包下載地址

1.2 POI對Excel文件的讀取操作比較方便,POI還提供對Word的DOC格式文件的讀取。但在它的發行版本中沒有發布對Word支持的模塊,需要另外下載一個POI的擴展的Jar包。下載地址為;下載extractors-0.4_zip這個文件

2、提取Doc文件內容

public static String readDoc(String doc) throws Exception {
// 創建輸入流讀取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 創建WordExtractor
extractor = new WordExtractor();
// 對DOC文件進行提取
text = extractor.extractText(in);
return text;
}

public static void main(String[] args) {
try{
String text = WordReader.readDoc("c:/test.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}
}

3、寫入Doc文檔

import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class WordWriter {
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {

// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();

ByteArrayInputStream s = new ByteArrayInputStream(b);

POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();

DocumentEntry de = directory.createDocument("WordDocument", s);

FileOutputStream ostream = new FileOutputStream(path);

fs.writeFilesystem(ostream);

s.close();
ostream.close();

} catch (IOException e) {
e.printStackTrace();
}
return w;
}
public static void main(String[] args) throws Exception{
String wr=WordReader.readDoc("D:\\test.doc");
boolean b = writeDoc("D:\\result.doc",wr);

⑻ java poi 怎麼控制輸出word每行的內容

你好,試試以下代碼行不行。
package com.sample;

import java.awt.Color;
import java.io.FileOutputStream;
import java.io.IOException;

import com.lowagie.text.Cell;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Element;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Image;
import com.lowagie.text.PageSize;
import com.lowagie.text.Paragraph;
import com.lowagie.text.Phrase;
import com.lowagie.text.Table;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.rtf.RtfWriter2;

/**
*
* @author wangyanjun
* @email [email protected]
* @createDate Jun 12, 2008
*/
public class CreateWordDemo {

public void createDocContext(String file) throws DocumentException,
IOException {
// 設置紙張大小
Document document = new Document(PageSize.A4);
// 建立一個書寫器(Writer)與document對象關聯,通過書寫器(Writer)可以將文檔寫入到磁碟中
RtfWriter2.getInstance(document, new FileOutputStream(file));
document.open();
// 設置中文字體
BaseFont bfChinese = BaseFont.createFont("STSongStd-Light",
"UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
// 標題字體風格
Font titleFont = new Font(bfChinese, 12, Font.BOLD);
// 正文字體風格
Font contextFont = new Font(bfChinese, 10, Font.NORMAL);
Paragraph title = new Paragraph("標題");
// 設置標題格式對齊方式
title.setAlignment(Element.ALIGN_CENTER);
title.setFont(titleFont);
document.add(title);

String contextString = "iText是一個能夠快速產生PDF文件的java類庫。"
+ " \n"// 換行
+ "iText的java類對於那些要產生包含文本,"
+ "表格,圖形的只讀文檔是很有用的。它的類庫尤其與java Servlet有很好的給合。"
+ "使用iText與PDF能夠使你正確的控制Servlet的輸出。";
Paragraph context = new Paragraph(contextString);
// 正文格式左對齊
context.setAlignment(Element.ALIGN_LEFT);
context.setFont(contextFont);
// 離上一段落(標題)空的行數
context.setSpacingBefore(5);
// 設置第一行空的列數
context.setFirstLineIndent(20);
document.add(context);

//利用類FontFactory結合Font和Color可以設置各種各樣字體樣式
/**
* Font.UNDERLINE 下劃線,Font.BOLD 粗體
*/
Paragraph underline = new Paragraph("下劃線的實現", FontFactory.getFont(
FontFactory.HELVETICA_BOLDOBLIQUE, 18, Font.UNDERLINE,
new Color(0, 0, 255)));
document.add(underline);

// 設置 Table 表格
Table aTable = new Table(3);
int width[] = {25,25,50};
aTable.setWidths(width);//設置每列所佔比例
aTable.setWidth(90); // 占頁面寬度 90%

aTable.setAlignment(Element.ALIGN_CENTER);//居中顯示
aTable.setAlignment(Element.ALIGN_MIDDLE);//縱向居中顯示
aTable.setAutoFillEmptyCells(true); //自動填滿
aTable.setBorderWidth(1); //邊框寬度
aTable.setBorderColor(new Color(0, 125, 255)); //邊框顏色
aTable.setPadding(2);//襯距,看效果就知道什麼意思了
aTable.setSpacing(3);//即單元格之間的間距
aTable.setBorder(2);//邊框

//設置表頭
/**
* cell.setHeader(true);是將該單元格作為表頭信息顯示;
* cell.setColspan(3);指定了該單元格佔3列;
* 為表格添加表頭信息時,要注意的是一旦表頭信息添加完了之後,
* 必須調用 endHeaders()方法,否則當表格跨頁後,表頭信息不會再顯示
*/
Cell haderCell = new Cell("表格表頭");
haderCell.setHeader(true);
haderCell.setColspan(3);
aTable.addCell(haderCell);
aTable.endHeaders();

Font fontChinese = new Font(bfChinese, 12, Font.NORMAL, Color.GREEN);
Cell cell = new Cell(new Phrase("這是一個測試的 3*3 Table 數據", fontChinese ));
cell.setVerticalAlignment(Element.ALIGN_TOP);
cell.setBorderColor(new Color(255, 0, 0));
cell.setRowspan(2);
aTable.addCell(cell);

aTable.addCell(new Cell("#1"));
aTable.addCell(new Cell("#2"));
aTable.addCell(new Cell("#3"));
aTable.addCell(new Cell("#4"));
Cell cell3 = new Cell(new Phrase("一行三列數據", fontChinese ));
cell3.setColspan(3);
cell3.setVerticalAlignment(Element.ALIGN_CENTER);
aTable.addCell(cell3);

document.add(aTable);
document.add(new Paragraph("\n"));
//添加圖片
Image img=Image.getInstance("d:\\img01800.jpg");
img.setAbsolutePosition(0, 0);
img.setAlignment(Image.RIGHT);//設置圖片顯示位置
img.scaleAbsolute(12,35);//直接設定顯示尺寸
img.scalePercent(50);//表示顯示的大小為原尺寸的50%
img.scalePercent(25, 12);//圖像高寬的顯示比例
img.setRotation(30);//圖像旋轉一定角度
document.add(img);

document.close();
}

/**
* @param args
*/
public static void main(String[] args) {
CreateWordDemo word = new CreateWordDemo();
String file = "c:/demo1.doc";
try {
word.createDocContext(file);
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}

}

}

⑼ 怎麼用java poi生成word表格

rt java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class XwpfTUtil {

/*String filePath = "/sta.docx";
InputStream is;
XWPFDocument doc;
Map<String, Object> params = new HashMap<String, Object>();

{
params.put("${name}", "xxx");
params.put("${sex}", "男");
params.put("${political}", "共青團員");
params.put("${place}", "sssss");
params.put("${classes}", "3102");
params.put("${id}", "213123123");
params.put("${qq}", "213123");
params.put("${tel}", "312313213");
params.put("${oldJob}", "sadasd");
params.put("${swap}", "是");
params.put("${first}", "asdasd");
params.put("${second}", "綜合事務部");
params.put("${award}", "asda");
params.put("${achievement}", "完成科協網站的開發");
params.put("${advice}", "沒有建議");

熱點內容
零基礎編程入門 發布:2023-02-07 00:56:43 瀏覽:816
linux虛擬機上外網訪問 發布:2023-02-07 00:54:20 瀏覽:736
紅米訪問限制設置 發布:2023-02-07 00:52:04 瀏覽:108
上傳大文件網盤 發布:2023-02-07 00:51:06 瀏覽:266
上傳cdn文件 發布:2023-02-07 00:49:48 瀏覽:613
linux裁剪 發布:2023-02-07 00:45:40 瀏覽:875
安卓轉產品經理怎麼樣 發布:2023-02-07 00:45:35 瀏覽:706
智能防盜鎖加密卡怎麼配置 發布:2023-02-07 00:45:33 瀏覽:915
ftp切換被動模式 發布:2023-02-07 00:45:33 瀏覽:104
文件加密後打不開 發布:2023-02-07 00:44:21 瀏覽:314