Improve summary prompt, add markdown-to-DOCX styling, and add cover pages

- Configurable summary prompts via ENV or file; stronger default prompt. - New docx_styles.py: converts markdown (headings, bullets, bold/italic) to DOCX. - Updated create_summary_docx to use markdown-aware styling. - New docx_cover.py: reusable cover page for transcript and summary. - Cover pages enabled when COVER_PAGE_ENABLED=true.
2026-06-19 17:16:46 +00:00
parent 54414def26
commit 7a31be9de5
4 changed files with 369 additions and 37 deletions
@@ -148,19 +148,76 @@ class SummarizerClient:
            start = break_pos
        return chunks

+    def _load_summary_prompt(self, role: str) -> str:
+        """
+        Load summary prompt for the given role: 'chunk' or 'combined'.
+
+        Priority:
+        1) SUMMARY_PROMPT_{ROLE} (env)
+        2) SUMMARY_PROMPT_FILE (env) with [chunk] / [combined] sections
+        3) Built-in default prompt
+        """
+        role_upper = role.upper()
+
+        # 1) Direct env var: SUMMARY_PROMPT_CHUNK / SUMMARY_PROMPT_COMBINED
+        env_key = f"SUMMARY_PROMPT_{role_upper}"
+        env_prompt = (os.getenv(env_key) or "").strip()
+        if env_prompt:
+            return env_prompt
+
+        # 2) File-based prompt with sections
+        prompt_file = (os.getenv("SUMMARY_PROMPT_FILE") or "").strip()
+        if prompt_file and os.path.exists(prompt_file):
+            try:
+                with open(prompt_file, "r", encoding="utf-8") as f:
+                    content = f.read()
+                # Simple section parser: [chunk], [combined]
+                import re
+                pattern = re.compile(
+                    r"\[" + role + r"\]\s*\n(.*?)(?=\n\[|$)",
+                    re.DOTALL,
+                )
+                m = pattern.search(content)
+                if m:
+                    text = m.group(1).strip()
+                    if text:
+                        return text
+            except Exception as e:
+                logger.warning("Failed to load SUMMARY_PROMPT_FILE for %s: %s", role, e)
+
+        # 3) Default prompts
+        if role == "chunk":
+            return (
+                "You are an expert legal and business meeting summarizer. "
+                "You will receive a segment of a longer transcript. "
+                "Provide a detailed, structured summary of this segment, focusing on: "
+                "- Topics discussed\n"
+                "- Key points and arguments\n"
+                "- Decisions and agreements\n"
+                "- Action items and responsibilities\n"
+                "- Any risks, conflicts, or open issues\n\n"
+                "Be concise but complete. Use bullet points where helpful. "
+                "Do not add information that is not present in the transcript."
+            )
+        else:
+            return (
+                "You are an expert legal and business meeting summarizer. "
+                "You will receive several intermediate summaries of a longer conversation. "
+                "Produce a single, comprehensive summary that makes it clear: "
+                "- The overall purpose and context of the discussion\n"
+                "- The main issues and topics addressed\n"
+                "- Key arguments and positions (briefly)\n"
+                "- Decisions and outcomes\n"
+                "- Action items, responsibilities, and next steps\n"
+                "- Any unresolved issues or risks\n\n"
+                "The summary should be detailed enough that a reader who was not present "
+                "can understand what happened and what is expected going forward. "
+                "Use clear, concise language and bullet points where appropriate. "
+                "Use markdown formatting (headings, lists, bold) to structure the summary."
+            )
+
    def _summarize_chunk(self, chunk: str, index: int, total: int) -> str:
-        system_prompt = (
-            "You are an expert legal and business meeting summarizer. "
-            "You will receive a segment of a longer transcript. "
-            "Provide a detailed, structured summary of this segment, focusing on: "
-            "- Topics discussed\n"
-            "- Key points and arguments\n"
-            "- Decisions and agreements\n"
-            "- Action items and responsibilities\n"
-            "- Any risks, conflicts, or open issues\n\n"
-            "Be concise but complete. Use bullet points when helpful. "
-            "Do not add information that is not present in the transcript."
-        )
+        system_prompt = self._load_summary_prompt("chunk")

        user_prompt = (
            f"This is segment {index + 1} of {total} from a longer conversation.\n\n"
@@ -170,20 +227,7 @@ class SummarizerClient:
        return self._chat_completion(system_prompt, user_prompt)

    def _summarize_combined(self, combined_summaries: str) -> str:
-        system_prompt = (
-            "You are an expert legal and business meeting summarizer. "
-            "You will receive several intermediate summaries of a longer conversation. "
-            "Produce a single, comprehensive summary that makes it clear: "
-            "- The overall purpose and context of the discussion\n"
-            "- The main issues and topics addressed\n"
-            "- Key arguments and positions (briefly)\n"
-            "- Decisions and outcomes\n"
-            "- Action items, responsibilities, and next steps\n"
-            "- Any unresolved issues or risks\n\n"
-            "The summary should be detailed enough that a reader who was not present "
-            "can understand what happened and what is expected going forward. "
-            "Use clear, concise language and bullet points where appropriate."
-        )
+        system_prompt = self._load_summary_prompt("combined")

        user_prompt = (
            "Here are the intermediate summaries from different parts of the same conversation:\n\n"