Add cover pages to transcript/summary .docx with AI-generated descriptions; include combined .docx when both requested
This commit is contained in:
+105
-45
@@ -284,25 +284,13 @@ def send_email(
|
|||||||
raise EmailError(f"Failed to send email: {e}")
|
raise EmailError(f"Failed to send email: {e}")
|
||||||
|
|
||||||
|
|
||||||
def create_transcript_docx(text: str, filename: str):
|
def _setup_docx_style(doc):
|
||||||
"""
|
|
||||||
Create a .docx transcript with:
|
|
||||||
- 1.5" left margin, 1" right margin
|
|
||||||
- 12pt Courier
|
|
||||||
- Continuous line numbering on the left
|
|
||||||
- Speaker names capitalized and indented; spoken text further indented
|
|
||||||
- No section headings; use bold/underline only.
|
|
||||||
"""
|
|
||||||
doc = Document()
|
|
||||||
|
|
||||||
# Set margins via section properties
|
|
||||||
section = doc.sections[0]
|
section = doc.sections[0]
|
||||||
section.left_margin = Inches(1.5)
|
section.left_margin = Inches(1.5)
|
||||||
section.right_margin = Inches(1.0)
|
section.right_margin = Inches(1.0)
|
||||||
section.top_margin = Inches(1.0)
|
section.top_margin = Inches(1.0)
|
||||||
section.bottom_margin = Inches(1.0)
|
section.bottom_margin = Inches(1.0)
|
||||||
|
|
||||||
# Enable continuous line numbering on the left
|
|
||||||
sectPr = section._sectPr
|
sectPr = section._sectPr
|
||||||
lnNumType = sectPr.find(qn("w:lnNumType"))
|
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||||
if lnNumType is None:
|
if lnNumType is None:
|
||||||
@@ -311,85 +299,82 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
lnNumType.set(qn("w:start"), "continuous")
|
lnNumType.set(qn("w:start"), "continuous")
|
||||||
lnNumType.set(qn("w:countBy"), "1")
|
lnNumType.set(qn("w:countBy"), "1")
|
||||||
|
|
||||||
# Default font
|
|
||||||
style = doc.styles["Normal"]
|
style = doc.styles["Normal"]
|
||||||
font = style.font
|
font = style.font
|
||||||
font.name = "Courier"
|
font.name = "Courier"
|
||||||
font.size = Pt(12)
|
font.size = Pt(12)
|
||||||
|
|
||||||
# Parse lines
|
|
||||||
|
def _add_cover_page(doc, doc_type, date, description):
|
||||||
|
p_type = doc.add_paragraph()
|
||||||
|
run_type = p_type.add_run(doc_type)
|
||||||
|
run_type.bold = True
|
||||||
|
run_type.font.name = "Courier"
|
||||||
|
run_type.font.size = Pt(12)
|
||||||
|
|
||||||
|
p_date = doc.add_paragraph()
|
||||||
|
run_date = p_date.add_run(date)
|
||||||
|
run_date.font.name = "Courier"
|
||||||
|
run_date.font.size = Pt(12)
|
||||||
|
|
||||||
|
for _ in range(3):
|
||||||
|
doc.add_paragraph()
|
||||||
|
|
||||||
|
p_desc = doc.add_paragraph()
|
||||||
|
run_desc = p_desc.add_run(description)
|
||||||
|
run_desc.font.name = "Courier"
|
||||||
|
run_desc.font.size = Pt(12)
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_transcript_content(doc, text):
|
||||||
lines = text.strip().split("\n")
|
lines = text.strip().split("\n")
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Try to parse: [00:00] SPEAKER: text
|
|
||||||
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line)
|
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line)
|
||||||
if m:
|
if m:
|
||||||
ts, speaker, content = m.groups()
|
ts, speaker, content = m.groups()
|
||||||
# Single paragraph: [timestamp] SPEAKER NAME underlined, then text inline
|
|
||||||
p = doc.add_paragraph()
|
p = doc.add_paragraph()
|
||||||
p.paragraph_format.left_indent = Inches(0.25)
|
p.paragraph_format.left_indent = Inches(0.25)
|
||||||
|
|
||||||
# Timestamp + speaker name (underline only, not bold)
|
|
||||||
run_label = p.add_run(f"[{ts}] {speaker.upper()}:")
|
run_label = p.add_run(f"[{ts}] {speaker.upper()}:")
|
||||||
run_label.bold = False
|
run_label.bold = False
|
||||||
run_label.underline = True
|
run_label.underline = True
|
||||||
run_label.font.name = "Courier"
|
run_label.font.name = "Courier"
|
||||||
run_label.font.size = Pt(12)
|
run_label.font.size = Pt(12)
|
||||||
|
|
||||||
# Space after colon (no underline)
|
|
||||||
run_space = p.add_run(" ")
|
run_space = p.add_run(" ")
|
||||||
run_space.bold = False
|
run_space.bold = False
|
||||||
run_space.underline = False
|
run_space.underline = False
|
||||||
run_space.font.name = "Courier"
|
run_space.font.name = "Courier"
|
||||||
run_space.font.size = Pt(12)
|
run_space.font.size = Pt(12)
|
||||||
|
|
||||||
# Spoken text (no underline, no bold)
|
|
||||||
run_txt = p.add_run(content.strip())
|
run_txt = p.add_run(content.strip())
|
||||||
run_txt.bold = False
|
run_txt.bold = False
|
||||||
run_txt.underline = False
|
run_txt.underline = False
|
||||||
run_txt.font.name = "Courier"
|
run_txt.font.name = "Courier"
|
||||||
run_txt.font.size = Pt(12)
|
run_txt.font.size = Pt(12)
|
||||||
else:
|
else:
|
||||||
# Fallback for non-standard lines
|
|
||||||
p = doc.add_paragraph()
|
p = doc.add_paragraph()
|
||||||
run = p.add_run(line)
|
run = p.add_run(line)
|
||||||
run.font.name = "Courier"
|
run.font.name = "Courier"
|
||||||
run.font.size = Pt(12)
|
run.font.size = Pt(12)
|
||||||
|
|
||||||
doc.save(filename)
|
|
||||||
|
|
||||||
|
|
||||||
def create_summary_docx(text: str, filename: str):
|
|
||||||
"""
|
|
||||||
Create a .docx summary with consistent font.
|
|
||||||
Translates markdown headings into WYSIWYG formatting:
|
|
||||||
- First heading: bold
|
|
||||||
- Second heading: italic
|
|
||||||
- Third heading: underline
|
|
||||||
- Fourth heading: italic + underline
|
|
||||||
No section headings; use bold/underline only.
|
|
||||||
"""
|
|
||||||
doc = Document()
|
|
||||||
style = doc.styles["Normal"]
|
|
||||||
font = style.font
|
|
||||||
font.name = "Courier"
|
|
||||||
font.size = Pt(12)
|
|
||||||
|
|
||||||
heading_count = 0 # track headings in order
|
|
||||||
|
|
||||||
|
def _add_summary_content(doc, text):
|
||||||
|
heading_count = 0
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if not stripped:
|
if not stripped:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Detect markdown-style headings: #, ##, ###, ####
|
|
||||||
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
||||||
if m:
|
if m:
|
||||||
heading_count += 1
|
heading_count += 1
|
||||||
level = len(m.group(1)) # 1..4
|
|
||||||
content = m.group(2).strip()
|
content = m.group(2).strip()
|
||||||
|
|
||||||
p = doc.add_paragraph()
|
p = doc.add_paragraph()
|
||||||
@@ -399,7 +384,6 @@ def create_summary_docx(text: str, filename: str):
|
|||||||
run.font.name = "Courier"
|
run.font.name = "Courier"
|
||||||
run.font.size = Pt(12)
|
run.font.size = Pt(12)
|
||||||
|
|
||||||
# Apply formatting based on this heading's ordinal position
|
|
||||||
if heading_count == 1:
|
if heading_count == 1:
|
||||||
run.bold = True
|
run.bold = True
|
||||||
elif heading_count == 2:
|
elif heading_count == 2:
|
||||||
@@ -410,8 +394,84 @@ def create_summary_docx(text: str, filename: str):
|
|||||||
run.italic = True
|
run.italic = True
|
||||||
run.underline = True
|
run.underline = True
|
||||||
else:
|
else:
|
||||||
# Normal text line
|
|
||||||
p = doc.add_paragraph(stripped)
|
p = doc.add_paragraph(stripped)
|
||||||
p.paragraph_format.space_after = Pt(4)
|
p.paragraph_format.space_after = Pt(4)
|
||||||
|
|
||||||
|
|
||||||
|
def create_transcript_docx(
|
||||||
|
text: str,
|
||||||
|
filename: str,
|
||||||
|
include_cover: bool = False,
|
||||||
|
cover_date: str = "",
|
||||||
|
cover_desc: str = "",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a .docx transcript with:
|
||||||
|
- 1.5" left margin, 1" right margin
|
||||||
|
- 12pt Courier
|
||||||
|
- Continuous line numbering on the left
|
||||||
|
- Optional cover page with type, date, and AI-generated description.
|
||||||
|
"""
|
||||||
|
doc = Document()
|
||||||
|
_setup_docx_style(doc)
|
||||||
|
|
||||||
|
if include_cover and cover_date and cover_desc:
|
||||||
|
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
||||||
|
|
||||||
|
_add_transcript_content(doc, text)
|
||||||
|
doc.save(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def create_summary_docx(
|
||||||
|
text: str,
|
||||||
|
filename: str,
|
||||||
|
include_cover: bool = False,
|
||||||
|
cover_date: str = "",
|
||||||
|
cover_desc: str = "",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a .docx summary with consistent font and heading styles.
|
||||||
|
Optional cover page with type, date, and AI-generated description.
|
||||||
|
"""
|
||||||
|
doc = Document()
|
||||||
|
_setup_docx_style(doc)
|
||||||
|
|
||||||
|
if include_cover and cover_date and cover_desc:
|
||||||
|
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
||||||
|
|
||||||
|
_add_summary_content(doc, text)
|
||||||
|
doc.save(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def create_combined_docx(
|
||||||
|
transcript_text: str,
|
||||||
|
summary_text: str,
|
||||||
|
filename: str,
|
||||||
|
transcript_cover_date: str,
|
||||||
|
transcript_cover_desc: str,
|
||||||
|
summary_cover_date: str,
|
||||||
|
summary_cover_desc: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a combined .docx with:
|
||||||
|
- Transcript cover page
|
||||||
|
- Transcript content
|
||||||
|
- Page break
|
||||||
|
- Summary cover page
|
||||||
|
- Summary content
|
||||||
|
"""
|
||||||
|
doc = Document()
|
||||||
|
_setup_docx_style(doc)
|
||||||
|
|
||||||
|
# Transcript cover page
|
||||||
|
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
||||||
|
_add_transcript_content(doc, transcript_text)
|
||||||
|
|
||||||
|
# Page break before summary
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# Summary cover page
|
||||||
|
_add_cover_page(doc, "SUMMARY", summary_cover_date, summary_cover_desc)
|
||||||
|
_add_summary_content(doc, summary_text)
|
||||||
|
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
|
|||||||
+89
-3
@@ -14,6 +14,7 @@ from .summarizer import SummarizerClient, SummarizerError
|
|||||||
from .misc import setup_logging
|
from .misc import setup_logging
|
||||||
from .email_sender import send_email, EmailError, load_template
|
from .email_sender import send_email, EmailError, load_template
|
||||||
from .email_sender import create_transcript_docx, create_summary_docx
|
from .email_sender import create_transcript_docx, create_summary_docx
|
||||||
|
from .email_sender import create_combined_docx
|
||||||
|
|
||||||
logger = logging.getLogger("scraibe.tasks")
|
logger = logging.getLogger("scraibe.tasks")
|
||||||
|
|
||||||
@@ -405,6 +406,58 @@ def process_transcription_task(
|
|||||||
)
|
)
|
||||||
speaker_map = {}
|
speaker_map = {}
|
||||||
|
|
||||||
|
# 3c) Generate short cover-page descriptions using summarizer
|
||||||
|
transcript_cover_desc = ""
|
||||||
|
summary_cover_desc = ""
|
||||||
|
today_str = datetime.utcnow().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
try:
|
||||||
|
scraibe._ensure_summarizer()
|
||||||
|
summarizer = scraibe._summarizer
|
||||||
|
|
||||||
|
# Transcript description
|
||||||
|
prompt = (
|
||||||
|
"In one short sentence (max 25 words), describe the content of this transcript "
|
||||||
|
"for use on a cover page. Do not include speaker names. Output only the sentence.\n\n"
|
||||||
|
+ transcript_text
|
||||||
|
)
|
||||||
|
response = summarizer._chat_completion(
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=0.3,
|
||||||
|
max_tokens=60,
|
||||||
|
)
|
||||||
|
transcript_cover_desc = (
|
||||||
|
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||||
|
).strip()
|
||||||
|
if not transcript_cover_desc:
|
||||||
|
transcript_cover_desc = "Transcript of the recorded conversation."
|
||||||
|
logger.info("Transcript cover description: %s", transcript_cover_desc)
|
||||||
|
|
||||||
|
# Summary description (if summary requested)
|
||||||
|
if include_summary:
|
||||||
|
prompt = (
|
||||||
|
"In one short sentence (max 25 words), describe the content of this summary "
|
||||||
|
"for use on a cover page. Output only the sentence.\n\n"
|
||||||
|
+ summary_text
|
||||||
|
)
|
||||||
|
response = summarizer._chat_completion(
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=0.3,
|
||||||
|
max_tokens=60,
|
||||||
|
)
|
||||||
|
summary_cover_desc = (
|
||||||
|
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||||
|
).strip()
|
||||||
|
if not summary_cover_desc:
|
||||||
|
summary_cover_desc = "Summary of the recorded conversation."
|
||||||
|
logger.info("Summary cover description: %s", summary_cover_desc)
|
||||||
|
|
||||||
|
except (SummarizerError, Exception) as e:
|
||||||
|
logger.warning("Failed to generate cover descriptions: %s", e)
|
||||||
|
transcript_cover_desc = transcript_cover_desc or "Transcript of the recorded conversation."
|
||||||
|
if include_summary:
|
||||||
|
summary_cover_desc = summary_cover_desc or "Summary of the recorded conversation."
|
||||||
|
|
||||||
# 4) Prepare files
|
# 4) Prepare files
|
||||||
|
|
||||||
# Transcript .md
|
# Transcript .md
|
||||||
@@ -414,9 +467,15 @@ def process_transcription_task(
|
|||||||
f.write(transcript_text)
|
f.write(transcript_text)
|
||||||
temp_files.append(md_transcript_path)
|
temp_files.append(md_transcript_path)
|
||||||
|
|
||||||
# Transcript .docx
|
# Transcript .docx (standalone, with cover page)
|
||||||
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
||||||
create_transcript_docx(transcript_text, docx_transcript_path)
|
create_transcript_docx(
|
||||||
|
transcript_text,
|
||||||
|
docx_transcript_path,
|
||||||
|
include_cover=True,
|
||||||
|
cover_date=today_str,
|
||||||
|
cover_desc=transcript_cover_desc,
|
||||||
|
)
|
||||||
temp_files.append(docx_transcript_path)
|
temp_files.append(docx_transcript_path)
|
||||||
|
|
||||||
# JSON as SOURCE
|
# JSON as SOURCE
|
||||||
@@ -440,17 +499,42 @@ def process_transcription_task(
|
|||||||
temp_files.append(json_path)
|
temp_files.append(json_path)
|
||||||
|
|
||||||
# Summary files (if present)
|
# Summary files (if present)
|
||||||
|
md_summary_path = None
|
||||||
|
docx_summary_path = None
|
||||||
|
docx_combined_path = None
|
||||||
|
|
||||||
if summary_text:
|
if summary_text:
|
||||||
|
# Summary .md
|
||||||
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
|
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
|
||||||
with open(md_summary_path, "w", encoding="utf-8") as f:
|
with open(md_summary_path, "w", encoding="utf-8") as f:
|
||||||
f.write("# Summary\n\n")
|
f.write("# Summary\n\n")
|
||||||
f.write(summary_text)
|
f.write(summary_text)
|
||||||
temp_files.append(md_summary_path)
|
temp_files.append(md_summary_path)
|
||||||
|
|
||||||
|
# Summary .docx (standalone, with cover page)
|
||||||
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
||||||
create_summary_docx(summary_text, docx_summary_path)
|
create_summary_docx(
|
||||||
|
summary_text,
|
||||||
|
docx_summary_path,
|
||||||
|
include_cover=True,
|
||||||
|
cover_date=today_str,
|
||||||
|
cover_desc=summary_cover_desc,
|
||||||
|
)
|
||||||
temp_files.append(docx_summary_path)
|
temp_files.append(docx_summary_path)
|
||||||
|
|
||||||
|
# Combined .docx: Transcript + Summary with cover pages
|
||||||
|
docx_combined_path = _safe_filename("TRANSCRIPT_AND_SUMMARY", local, date_tag, ".docx")
|
||||||
|
create_combined_docx(
|
||||||
|
transcript_text=transcript_text,
|
||||||
|
summary_text=summary_text,
|
||||||
|
filename=docx_combined_path,
|
||||||
|
transcript_cover_date=today_str,
|
||||||
|
transcript_cover_desc=transcript_cover_desc,
|
||||||
|
summary_cover_date=today_str,
|
||||||
|
summary_cover_desc=summary_cover_desc,
|
||||||
|
)
|
||||||
|
temp_files.append(docx_combined_path)
|
||||||
|
|
||||||
attachments = [
|
attachments = [
|
||||||
md_transcript_path,
|
md_transcript_path,
|
||||||
docx_transcript_path,
|
docx_transcript_path,
|
||||||
@@ -458,6 +542,8 @@ def process_transcription_task(
|
|||||||
]
|
]
|
||||||
if summary_text:
|
if summary_text:
|
||||||
attachments += [md_summary_path, docx_summary_path]
|
attachments += [md_summary_path, docx_summary_path]
|
||||||
|
if docx_combined_path:
|
||||||
|
attachments.append(docx_combined_path)
|
||||||
|
|
||||||
# 5) Send success email
|
# 5) Send success email
|
||||||
send_success_email(
|
send_success_email(
|
||||||
|
|||||||
Reference in New Issue
Block a user