Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion pkg/document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ import (
"github.com/zerx-lab/wordZero/pkg/style"
)

const (
wordprocessingMLNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
wordprocessingMLStrictNamespace = "http://purl.oclc.org/ooxml/wordprocessingml/main"
)

// Document 表示一个Word文档
type Document struct {
// 文档的主要内容
Expand Down Expand Up @@ -2016,21 +2021,42 @@ func (d *Document) parseDocument() error {

switch t := token.(type) {
case xml.StartElement:
if t.Name.Local == "document" && t.Name.Space == "http://schemas.openxmlformats.org/wordprocessingml/2006/main" {
if isWordprocessingDocumentElement(t) {
// 开始解析文档
if err := d.parseDocumentElement(decoder); err != nil {
return err
}
goto done
}
if t.Name.Local == "document" {
return WrapError("parse_document", fmt.Errorf("%w: unsupported document namespace %q", ErrInvalidDocument, t.Name.Space))
}
}
}

return WrapError("parse_document", fmt.Errorf("%w: missing document root", ErrInvalidDocument))

done:
if d.Body == nil {
return WrapError("parse_document", fmt.Errorf("%w: missing document body", ErrInvalidDocument))
}
Infof("解析完成,共 %d 个元素", len(d.Body.Elements))
return nil
}

func isWordprocessingDocumentElement(start xml.StartElement) bool {
if start.Name.Local != "document" {
return false
}

switch start.Name.Space {
case wordprocessingMLNamespace, wordprocessingMLStrictNamespace:
return true
default:
return false
}
}

// parseDocumentElement 解析文档元素
func (d *Document) parseDocumentElement(decoder *xml.Decoder) error {
// 初始化Body
Expand Down
75 changes: 75 additions & 0 deletions pkg/document/document_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package document

import (
"archive/zip"
"bytes"
"errors"
"io"
"os"
"testing"

Expand Down Expand Up @@ -958,6 +962,77 @@ func TestDocumentOpenFromMemory(t *testing.T) {
}
}

func TestDocumentOpenFromMemoryWithStrictNamespace(t *testing.T) {
files := buildTestDocxReader(t, `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://purl.oclc.org/ooxml/wordprocessingml/main">
<w:body>
<w:p>
<w:r>
<w:t>Strict 文档</w:t>
</w:r>
</w:p>
</w:body>
</w:document>`)

doc, err := OpenFromMemory(files)
if err != nil {
t.Fatalf("Failed to open strict namespace document: %v", err)
}

paragraphs := doc.Body.GetParagraphs()
if len(paragraphs) != 1 {
t.Fatalf("Expected 1 paragraph, got %d", len(paragraphs))
}

if got := paragraphs[0].Runs[0].Text.Content; got != "Strict 文档" {
t.Fatalf("Expected strict namespace text to be parsed, got %q", got)
}
}

func TestDocumentOpenFromMemoryWithInvalidRootReturnsError(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Fatalf("OpenFromMemory should return an error instead of panicking: %v", r)
}
}()

files := buildTestDocxReader(t, `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<document xmlns="https://example.com/custom-wordprocessingml">
<body/>
</document>`)

_, err := OpenFromMemory(files)
if err == nil {
t.Fatal("Expected invalid document root to return an error")
}

if !errors.Is(err, ErrInvalidDocument) {
t.Fatalf("Expected ErrInvalidDocument, got %v", err)
}
}

func buildTestDocxReader(t *testing.T, documentXML string) io.ReadCloser {
t.Helper()

var buf bytes.Buffer
zipWriter := zip.NewWriter(&buf)

fileWriter, err := zipWriter.Create("word/document.xml")
if err != nil {
t.Fatalf("Failed to create document.xml entry: %v", err)
}

if _, err := fileWriter.Write([]byte(documentXML)); err != nil {
t.Fatalf("Failed to write document.xml entry: %v", err)
}

if err := zipWriter.Close(); err != nil {
t.Fatalf("Failed to close test DOCX writer: %v", err)
}

return io.NopCloser(bytes.NewReader(buf.Bytes()))
}

// TestAddPageBreak 测试添加分页符功能
func TestAddPageBreak(t *testing.T) {
doc := New()
Expand Down