Improve summary prompt, add markdown-to-DOCX styling, and add cover pages
- Configurable summary prompts via ENV or file; stronger default prompt. - New docx_styles.py: converts markdown (headings, bullets, bold/italic) to DOCX. - Updated create_summary_docx to use markdown-aware styling. - New docx_cover.py: reusable cover page for transcript and summary. - Cover pages enabled when COVER_PAGE_ENABLED=true.
This commit is contained in:
+70
-26
@@ -148,19 +148,76 @@ class SummarizerClient:
|
||||
start = break_pos
|
||||
return chunks
|
||||
|
||||
def _load_summary_prompt(self, role: str) -> str:
|
||||
"""
|
||||
Load summary prompt for the given role: 'chunk' or 'combined'.
|
||||
|
||||
Priority:
|
||||
1) SUMMARY_PROMPT_{ROLE} (env)
|
||||
2) SUMMARY_PROMPT_FILE (env) with [chunk] / [combined] sections
|
||||
3) Built-in default prompt
|
||||
"""
|
||||
role_upper = role.upper()
|
||||
|
||||
# 1) Direct env var: SUMMARY_PROMPT_CHUNK / SUMMARY_PROMPT_COMBINED
|
||||
env_key = f"SUMMARY_PROMPT_{role_upper}"
|
||||
env_prompt = (os.getenv(env_key) or "").strip()
|
||||
if env_prompt:
|
||||
return env_prompt
|
||||
|
||||
# 2) File-based prompt with sections
|
||||
prompt_file = (os.getenv("SUMMARY_PROMPT_FILE") or "").strip()
|
||||
if prompt_file and os.path.exists(prompt_file):
|
||||
try:
|
||||
with open(prompt_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
# Simple section parser: [chunk], [combined]
|
||||
import re
|
||||
pattern = re.compile(
|
||||
r"\[" + role + r"\]\s*\n(.*?)(?=\n\[|$)",
|
||||
re.DOTALL,
|
||||
)
|
||||
m = pattern.search(content)
|
||||
if m:
|
||||
text = m.group(1).strip()
|
||||
if text:
|
||||
return text
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load SUMMARY_PROMPT_FILE for %s: %s", role, e)
|
||||
|
||||
# 3) Default prompts
|
||||
if role == "chunk":
|
||||
return (
|
||||
"You are an expert legal and business meeting summarizer. "
|
||||
"You will receive a segment of a longer transcript. "
|
||||
"Provide a detailed, structured summary of this segment, focusing on: "
|
||||
"- Topics discussed\n"
|
||||
"- Key points and arguments\n"
|
||||
"- Decisions and agreements\n"
|
||||
"- Action items and responsibilities\n"
|
||||
"- Any risks, conflicts, or open issues\n\n"
|
||||
"Be concise but complete. Use bullet points where helpful. "
|
||||
"Do not add information that is not present in the transcript."
|
||||
)
|
||||
else:
|
||||
return (
|
||||
"You are an expert legal and business meeting summarizer. "
|
||||
"You will receive several intermediate summaries of a longer conversation. "
|
||||
"Produce a single, comprehensive summary that makes it clear: "
|
||||
"- The overall purpose and context of the discussion\n"
|
||||
"- The main issues and topics addressed\n"
|
||||
"- Key arguments and positions (briefly)\n"
|
||||
"- Decisions and outcomes\n"
|
||||
"- Action items, responsibilities, and next steps\n"
|
||||
"- Any unresolved issues or risks\n\n"
|
||||
"The summary should be detailed enough that a reader who was not present "
|
||||
"can understand what happened and what is expected going forward. "
|
||||
"Use clear, concise language and bullet points where appropriate. "
|
||||
"Use markdown formatting (headings, lists, bold) to structure the summary."
|
||||
)
|
||||
|
||||
def _summarize_chunk(self, chunk: str, index: int, total: int) -> str:
|
||||
system_prompt = (
|
||||
"You are an expert legal and business meeting summarizer. "
|
||||
"You will receive a segment of a longer transcript. "
|
||||
"Provide a detailed, structured summary of this segment, focusing on: "
|
||||
"- Topics discussed\n"
|
||||
"- Key points and arguments\n"
|
||||
"- Decisions and agreements\n"
|
||||
"- Action items and responsibilities\n"
|
||||
"- Any risks, conflicts, or open issues\n\n"
|
||||
"Be concise but complete. Use bullet points when helpful. "
|
||||
"Do not add information that is not present in the transcript."
|
||||
)
|
||||
system_prompt = self._load_summary_prompt("chunk")
|
||||
|
||||
user_prompt = (
|
||||
f"This is segment {index + 1} of {total} from a longer conversation.\n\n"
|
||||
@@ -170,20 +227,7 @@ class SummarizerClient:
|
||||
return self._chat_completion(system_prompt, user_prompt)
|
||||
|
||||
def _summarize_combined(self, combined_summaries: str) -> str:
|
||||
system_prompt = (
|
||||
"You are an expert legal and business meeting summarizer. "
|
||||
"You will receive several intermediate summaries of a longer conversation. "
|
||||
"Produce a single, comprehensive summary that makes it clear: "
|
||||
"- The overall purpose and context of the discussion\n"
|
||||
"- The main issues and topics addressed\n"
|
||||
"- Key arguments and positions (briefly)\n"
|
||||
"- Decisions and outcomes\n"
|
||||
"- Action items, responsibilities, and next steps\n"
|
||||
"- Any unresolved issues or risks\n\n"
|
||||
"The summary should be detailed enough that a reader who was not present "
|
||||
"can understand what happened and what is expected going forward. "
|
||||
"Use clear, concise language and bullet points where appropriate."
|
||||
)
|
||||
system_prompt = self._load_summary_prompt("combined")
|
||||
|
||||
user_prompt = (
|
||||
"Here are the intermediate summaries from different parts of the same conversation:\n\n"
|
||||
|
||||
Reference in New Issue
Block a user