Unified guidance for working with structured documents (DOCX, XLSX, PDF) in SEA-Forge™.
SEA-Forge™ provides structured document capabilities for:
All operations preserve semantic structure and support traceability to Knowledge Graph concepts.
| Format | Read | Write | Style | Templates |
|---|---|---|---|---|
| DOCX | ✅ | ✅ | ✅ | ✅ |
| XLSX | ✅ | ✅ | ✅ | ✅ |
| ✅ | ⚠️ | ❌ | ❌ |
⚠️ = PDF generation via DOCX → PDF export only
Use Cases:
Key Features:
Use Cases:
Key Features:
Use Cases:
Key Features:
Limitations:
1
2
3
4
5
6
from python-docx import Document
doc = Document()
doc.add_heading('SEA-Forge™ ADR', 0)
doc.add_paragraph('Decision: Adopt Knowledge Graph')
doc.save('adr-001.docx')
1
2
3
4
5
6
7
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws['A1'] = 'Concept'
ws['B1'] = 'Status'
wb.save('traceability.xlsx')
1
2
3
4
5
from python-docx import Document
doc = Document('existing.docx')
for paragraph in doc.paragraphs:
print(paragraph.text)
1
2
3
4
5
6
from openpyxl import load_workbook
wb = load_workbook('data.xlsx')
ws = wb.active
for row in ws.iter_rows(values_only=True):
print(row)
1
2
3
4
5
import pypdf
reader = pypdf.PdfReader('source.pdf')
for page in reader.pages:
print(page.extract_text())
1
2
3
4
5
6
7
8
9
from python-docx.shared import Pt, RGBColor
from python-docx.enum.text import WD_PARAGRAPH_ALIGNMENT
paragraph = doc.add_paragraph('Important Text')
run = paragraph.runs[0]
run.bold = True
run.font.size = Pt(14)
run.font.color.rgb = RGBColor(0, 0, 255)
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
1
2
3
4
5
from openpyxl.styles import Font, PatternFill, Alignment
ws['A1'].font = Font(bold=True, size=14)
ws['A1'].fill = PatternFill(start_color='FFFF00', fill_type='solid')
ws['A1'].alignment = Alignment(horizontal='center')
1
2
3
4
5
6
7
table = doc.add_table(rows=3, cols=3)
table.style = 'Light Grid Accent 1'
header_cells = table.rows[0].cells
header_cells[0].text = 'Concept'
header_cells[1].text = 'Status'
header_cells[2].text = 'Owner'
1
2
3
4
5
6
7
8
from openpyxl.worksheet.table import Table, TableStyleInfo
ws.append(['Concept', 'Status', 'Owner'])
ws.append(['BoundedContext', 'Active', 'TeamA'])
tab = Table(displayName='ConceptTable', ref='A1:C2')
tab.tableStyleInfo = TableStyleInfo(name='TableStyleMedium2')
ws.add_table(tab)
1
2
3
ws['D2'] = '=SUM(B2:C2)'
ws['D3'] = '=AVERAGE(B2:B10)'
ws['D4'] = '=IF(B2>100,"High","Low")'
1
2
3
4
5
6
7
8
from openpyxl.chart import BarChart, Reference
chart = BarChart()
data = Reference(ws, min_col=2, min_row=1, max_row=10)
categories = Reference(ws, min_col=1, min_row=2, max_row=10)
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, 'E5')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def export_adr_to_docx(adr_md_path, output_docx_path):
"""Convert ADR markdown to formatted DOCX."""
import markdown
from python-docx import Document
# Read markdown
with open(adr_md_path, 'r') as f:
md_content = f.read()
# Parse sections
doc = Document()
doc.add_heading('SEA-Forge™ Architecture Decision Record', 0)
# Add sections (title, status, context, decision, consequences)
# [Implementation details omitted for brevity]
doc.save(output_docx_path)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def generate_traceability_matrix(mapping_data, output_xlsx_path):
"""Generate traceability matrix spreadsheet."""
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill
wb = Workbook()
ws = wb.active
ws.title = 'Traceability'
# Header row
headers = ['ADR ID', 'PRD IDs', 'SDS IDs', 'Coverage']
for idx, header in enumerate(headers, start=1):
cell = ws.cell(row=1, column=idx, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color='366092', fill_type='solid')
# Data rows
for row_idx, mapping in enumerate(mapping_data, start=2):
ws.cell(row=row_idx, column=1, value=mapping['adr'])
ws.cell(row=row_idx, column=2, value=', '.join(mapping['prds']))
ws.cell(row=row_idx, column=3, value=', '.join(mapping['sdss']))
ws.cell(row=row_idx, column=4, value=f"=IF(C{row_idx}<>\"\", \"✓\", \"\")")
wb.save(output_xlsx_path)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def extract_pdf_for_indexing(pdf_path):
"""Extract PDF content with metadata for Knowledge Graph indexing."""
import pypdf
reader = pypdf.PdfReader(pdf_path)
extracted = {
'metadata': reader.metadata,
'pages': []
}
for page_num, page in enumerate(reader.pages):
extracted['pages'].append({
'page_num': page_num + 1,
'text': page.extract_text(),
'links': page.get('/Annots', []) # Extract hyperlinks
})
return extracted
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def generate_metrics_dashboard(metrics, output_xlsx_path):
"""Generate metrics dashboard with charts."""
from openpyxl import Workbook
from openpyxl.chart import LineChart, Reference
wb = Workbook()
ws = wb.active
ws.title = 'Metrics'
# Write data
ws.append(['Date', 'Velocity', 'Quality', 'Coverage'])
for metric in metrics:
ws.append([metric['date'], metric['velocity'], metric['quality'], metric['coverage']])
# Create chart
chart = LineChart()
chart.title = 'Sprint Metrics'
data = Reference(ws, min_col=2, min_row=1, max_col=4, max_row=len(metrics)+1)
categories = Reference(ws, min_col=1, min_row=2, max_row=len(metrics)+1)
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, 'F5')
wb.save(output_xlsx_path)
Link document content to Knowledge Graph concepts:
1
2
3
# Embed ConceptId as document property
doc.core_properties.subject = 'sea:BoundedContext'
doc.core_properties.keywords = 'ADR-021, SDS-012, PRD-026'
Use templates for consistency:
1
2
3
4
5
from python-docx import Document
template = Document('templates/adr-template.docx')
# Modify template placeholders
template.save('adr-new.docx')
Track document versions:
1
2
3
doc.core_properties.version = '1.0.0'
doc.core_properties.revision = 1
doc.core_properties.modified = datetime.now()
Ensure documents are accessible:
1
2
3
4
5
6
# Add alt text to images
doc.add_picture('diagram.png', description='Architecture diagram showing bounded contexts')
# Use proper heading hierarchy
doc.add_heading('Section 1', level=1)
doc.add_heading('Subsection 1.1', level=2)
Symptom: Style changes don’t appear in output
Solution:
1
2
3
4
5
6
7
# Ensure style exists in template
from python-docx.oxml.shared import OxmlElement
def add_style_if_missing(doc, style_name):
if style_name not in doc.styles:
# Add style programmatically
pass
Symptom: Formula shows as text, not result
Solution:
1
2
3
# Set cell data type explicitly
ws['D2'].value = '=SUM(B2:C2)'
ws['D2'].data_type = 'f' # 'f' for formula
Symptom: Missing text or garbled output
Solution:
1
2
3
4
5
6
# Try alternative PDF library
import pdfplumber
with pdfplumber.open('document.pdf') as pdf:
for page in pdf.pages:
text = page.extract_text(layout=True) # Preserve layout