Add cover pages to transcript/summary .docx with AI-generated descriptions; include combined .docx when both requested
This commit is contained in:
+105
-45
@@ -284,25 +284,13 @@ def send_email(
|
||||
raise EmailError(f"Failed to send email: {e}")
|
||||
|
||||
|
||||
def create_transcript_docx(text: str, filename: str):
|
||||
"""
|
||||
Create a .docx transcript with:
|
||||
- 1.5" left margin, 1" right margin
|
||||
- 12pt Courier
|
||||
- Continuous line numbering on the left
|
||||
- Speaker names capitalized and indented; spoken text further indented
|
||||
- No section headings; use bold/underline only.
|
||||
"""
|
||||
doc = Document()
|
||||
|
||||
# Set margins via section properties
|
||||
def _setup_docx_style(doc):
|
||||
section = doc.sections[0]
|
||||
section.left_margin = Inches(1.5)
|
||||
section.right_margin = Inches(1.0)
|
||||
section.top_margin = Inches(1.0)
|
||||
section.bottom_margin = Inches(1.0)
|
||||
|
||||
# Enable continuous line numbering on the left
|
||||
sectPr = section._sectPr
|
||||
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||
if lnNumType is None:
|
||||
@@ -311,85 +299,82 @@ def create_transcript_docx(text: str, filename: str):
|
||||
lnNumType.set(qn("w:start"), "continuous")
|
||||
lnNumType.set(qn("w:countBy"), "1")
|
||||
|
||||
# Default font
|
||||
style = doc.styles["Normal"]
|
||||
font = style.font
|
||||
font.name = "Courier"
|
||||
font.size = Pt(12)
|
||||
|
||||
# Parse lines
|
||||
|
||||
def _add_cover_page(doc, doc_type, date, description):
|
||||
p_type = doc.add_paragraph()
|
||||
run_type = p_type.add_run(doc_type)
|
||||
run_type.bold = True
|
||||
run_type.font.name = "Courier"
|
||||
run_type.font.size = Pt(12)
|
||||
|
||||
p_date = doc.add_paragraph()
|
||||
run_date = p_date.add_run(date)
|
||||
run_date.font.name = "Courier"
|
||||
run_date.font.size = Pt(12)
|
||||
|
||||
for _ in range(3):
|
||||
doc.add_paragraph()
|
||||
|
||||
p_desc = doc.add_paragraph()
|
||||
run_desc = p_desc.add_run(description)
|
||||
run_desc.font.name = "Courier"
|
||||
run_desc.font.size = Pt(12)
|
||||
|
||||
doc.add_page_break()
|
||||
|
||||
|
||||
def _add_transcript_content(doc, text):
|
||||
lines = text.strip().split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Try to parse: [00:00] SPEAKER: text
|
||||
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line)
|
||||
if m:
|
||||
ts, speaker, content = m.groups()
|
||||
# Single paragraph: [timestamp] SPEAKER NAME underlined, then text inline
|
||||
p = doc.add_paragraph()
|
||||
p.paragraph_format.left_indent = Inches(0.25)
|
||||
|
||||
# Timestamp + speaker name (underline only, not bold)
|
||||
run_label = p.add_run(f"[{ts}] {speaker.upper()}:")
|
||||
run_label.bold = False
|
||||
run_label.underline = True
|
||||
run_label.font.name = "Courier"
|
||||
run_label.font.size = Pt(12)
|
||||
|
||||
# Space after colon (no underline)
|
||||
run_space = p.add_run(" ")
|
||||
run_space.bold = False
|
||||
run_space.underline = False
|
||||
run_space.font.name = "Courier"
|
||||
run_space.font.size = Pt(12)
|
||||
|
||||
# Spoken text (no underline, no bold)
|
||||
run_txt = p.add_run(content.strip())
|
||||
run_txt.bold = False
|
||||
run_txt.underline = False
|
||||
run_txt.font.name = "Courier"
|
||||
run_txt.font.size = Pt(12)
|
||||
else:
|
||||
# Fallback for non-standard lines
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(line)
|
||||
run.font.name = "Courier"
|
||||
run.font.size = Pt(12)
|
||||
|
||||
doc.save(filename)
|
||||
|
||||
|
||||
def create_summary_docx(text: str, filename: str):
|
||||
"""
|
||||
Create a .docx summary with consistent font.
|
||||
Translates markdown headings into WYSIWYG formatting:
|
||||
- First heading: bold
|
||||
- Second heading: italic
|
||||
- Third heading: underline
|
||||
- Fourth heading: italic + underline
|
||||
No section headings; use bold/underline only.
|
||||
"""
|
||||
doc = Document()
|
||||
style = doc.styles["Normal"]
|
||||
font = style.font
|
||||
font.name = "Courier"
|
||||
font.size = Pt(12)
|
||||
|
||||
heading_count = 0 # track headings in order
|
||||
|
||||
def _add_summary_content(doc, text):
|
||||
heading_count = 0
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
# Detect markdown-style headings: #, ##, ###, ####
|
||||
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
||||
if m:
|
||||
heading_count += 1
|
||||
level = len(m.group(1)) # 1..4
|
||||
content = m.group(2).strip()
|
||||
|
||||
p = doc.add_paragraph()
|
||||
@@ -399,7 +384,6 @@ def create_summary_docx(text: str, filename: str):
|
||||
run.font.name = "Courier"
|
||||
run.font.size = Pt(12)
|
||||
|
||||
# Apply formatting based on this heading's ordinal position
|
||||
if heading_count == 1:
|
||||
run.bold = True
|
||||
elif heading_count == 2:
|
||||
@@ -410,8 +394,84 @@ def create_summary_docx(text: str, filename: str):
|
||||
run.italic = True
|
||||
run.underline = True
|
||||
else:
|
||||
# Normal text line
|
||||
p = doc.add_paragraph(stripped)
|
||||
p.paragraph_format.space_after = Pt(4)
|
||||
|
||||
|
||||
def create_transcript_docx(
|
||||
text: str,
|
||||
filename: str,
|
||||
include_cover: bool = False,
|
||||
cover_date: str = "",
|
||||
cover_desc: str = "",
|
||||
):
|
||||
"""
|
||||
Create a .docx transcript with:
|
||||
- 1.5" left margin, 1" right margin
|
||||
- 12pt Courier
|
||||
- Continuous line numbering on the left
|
||||
- Optional cover page with type, date, and AI-generated description.
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
|
||||
if include_cover and cover_date and cover_desc:
|
||||
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
||||
|
||||
_add_transcript_content(doc, text)
|
||||
doc.save(filename)
|
||||
|
||||
|
||||
def create_summary_docx(
|
||||
text: str,
|
||||
filename: str,
|
||||
include_cover: bool = False,
|
||||
cover_date: str = "",
|
||||
cover_desc: str = "",
|
||||
):
|
||||
"""
|
||||
Create a .docx summary with consistent font and heading styles.
|
||||
Optional cover page with type, date, and AI-generated description.
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
|
||||
if include_cover and cover_date and cover_desc:
|
||||
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
||||
|
||||
_add_summary_content(doc, text)
|
||||
doc.save(filename)
|
||||
|
||||
|
||||
def create_combined_docx(
|
||||
transcript_text: str,
|
||||
summary_text: str,
|
||||
filename: str,
|
||||
transcript_cover_date: str,
|
||||
transcript_cover_desc: str,
|
||||
summary_cover_date: str,
|
||||
summary_cover_desc: str,
|
||||
):
|
||||
"""
|
||||
Create a combined .docx with:
|
||||
- Transcript cover page
|
||||
- Transcript content
|
||||
- Page break
|
||||
- Summary cover page
|
||||
- Summary content
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
|
||||
# Transcript cover page
|
||||
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
||||
_add_transcript_content(doc, transcript_text)
|
||||
|
||||
# Page break before summary
|
||||
doc.add_page_break()
|
||||
|
||||
# Summary cover page
|
||||
_add_cover_page(doc, "SUMMARY", summary_cover_date, summary_cover_desc)
|
||||
_add_summary_content(doc, summary_text)
|
||||
|
||||
doc.save(filename)
|
||||
|
||||
Reference in New Issue
Block a user