Merge branch 'develop' of github.com:JSchmie/ScrAIbe into develop
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
name: documentation
|
||||
|
||||
on: [push, pull_request, workflow_dispatch]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
docs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get install libsndfile1-dev
|
||||
pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install --upgrade --force-reinstall torch==1.11.0+cpu torchvision==0.12.0+cpu torchaudio==0.11.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
pip install numpy==1.25
|
||||
pip install --upgrade sphinx sphinx_rtd_theme myst-parser
|
||||
pip install --upgrade markdown-it-py[plugins]
|
||||
pip install --upgrade mdit-py-plugins
|
||||
|
||||
- name: Sphinx build
|
||||
run: |
|
||||
cp README.md ./source/README.md
|
||||
cp LICENSE ./source/LICENSE
|
||||
cp -r Pictures ./source/Pictures
|
||||
sphinx-apidoc -o source scraibe/
|
||||
sphinx-build -M html source docs
|
||||
make html
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v3
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/sphinx_action' }}
|
||||
with:
|
||||
publish_branch: gh-pages
|
||||
github_token: ${{ secrets.TOKEN_GH }}
|
||||
publish_dir: ./docs/html
|
||||
force_orphan: true
|
||||
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -16,8 +16,8 @@ During post-diarization, each audio segment is processed by the OpenAI `Whisper`
|
||||
|
||||
The following graphic illustrates the whole pipeline:
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
## Install `ScrAIbe` :
|
||||
|
||||
@@ -131,7 +131,7 @@ The default address is: http://127.0.0.1:7860 or http://0.0.0.0:7860
|
||||
After the app is running, you can upload your audio file and select the desired options.
|
||||
An example is shown below:
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
### Running a Docker container
|
||||
@@ -177,7 +177,7 @@ For further guidance, check: https://blog.roboflow.com/use-the-gpu-in-docker/
|
||||
|
||||
## Documentation
|
||||
|
||||
For further insights, check the [documentation page]().
|
||||
For further insights, check the [documentation page](https://jschmie.github.io/ScrAIbe/).
|
||||
|
||||
## Contributions
|
||||
|
||||
@@ -207,12 +207,12 @@ For queries contact [Jacob Schmieder](Jacob.Schmieder@dbfz.de)
|
||||
|
||||
## License
|
||||
|
||||
ScrAIbe is licensed under GNU General Public License.
|
||||
ScrAIbe is licensed under [GNU General Public License](LICENSE).
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
Special thanks go to the KIDA project and the BMEL (Bundesministerium für Ernährung und Landwirtschaft), especially to the AI Consultancy Team.
|
||||
|
||||
   
|
||||
   
|
||||
|
||||
   
|
||||
   
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
# List of known hallucinations - adapted from:
|
||||
# https://github.com/openai/whisper/discussions/928
|
||||
KNOWN_HALLUCINATIONS=[
|
||||
# en
|
||||
" www.mooji.org"
|
||||
# nl
|
||||
" Ondertitels ingediend door de Amara.org gemeenschap",
|
||||
" Ondertiteld door de Amara.org gemeenschap",
|
||||
" Ondertiteling door de Amara.org gemeenschap"
|
||||
# de
|
||||
" Untertitelung aufgrund der Amara.org-Community"
|
||||
" Untertitelung im Auftrag des ZDF für funk, 2016",
|
||||
" Untertitelung im Auftrag des ZDF f\u00fcr funk, 2016",
|
||||
" Untertitel im Auftrag des ZDF für funk, 2017",
|
||||
" Untertitel im Auftrag des ZDF f\u00fcr funk, 2017",
|
||||
" Untertitel im Auftrag des ZDF für funk, 2018",
|
||||
" Untertitel von Stephanie Geiges",
|
||||
" Untertitel der Amara.org-Community",
|
||||
" Untertitel im Auftrag des ZDF, 2017",
|
||||
" Untertitel im Auftrag des ZDF, 2018",
|
||||
" Untertitel im Auftrag des ZDF, 2019",
|
||||
" Untertitel im Auftrag des ZDF, 2020",
|
||||
" Untertitel im Auftrag des ZDF, 2021",
|
||||
" Untertitelung im Auftrag des ZDF, 2021",
|
||||
" Copyright WDR 2021",
|
||||
" Copyright WDR 2020",
|
||||
" Copyright WDR 2019",
|
||||
" SWR 2021",
|
||||
" SWR 2020",
|
||||
# fr
|
||||
" Sous-titres réalisés para la communauté d'Amara.org",
|
||||
" Sous-titres réalisés par la communauté d'Amara.org",
|
||||
" Sous-titres fait par Sous-titres par Amara.org",
|
||||
" Sous-titres réalisés par les SousTitres d'Amara.org",
|
||||
" Sous-titres par Amara.org",
|
||||
" Sous-titres par la communauté d'Amara.org",
|
||||
" Sous-titres réalisés pour la communauté d'Amara.org",
|
||||
" Sous-titres réalisés par la communauté de l'Amara.org",
|
||||
" Sous-Titres faits par la communauté d'Amara.org",
|
||||
" Sous-titres par l'Amara.org",
|
||||
" Sous-titres fait par la communauté d'Amara.org"
|
||||
" Sous-titrage ST' 501",
|
||||
" Sous-titrage ST'501",
|
||||
" Cliquez-vous sur les sous-titres et abonnez-vous à la chaîne d'Amara.org",
|
||||
" ❤️ par SousTitreur.com",
|
||||
# it
|
||||
" Sottotitoli creati dalla comunità Amara.org",
|
||||
" Sottotitoli di Sottotitoli di Amara.org",
|
||||
" Sottotitoli e revisione al canale di Amara.org",
|
||||
" Sottotitoli e revisione a cura di Amara.org",
|
||||
" Sottotitoli e revisione a cura di QTSS",
|
||||
" Sottotitoli e revisione a cura di QTSS.",
|
||||
" Sottotitoli a cura di QTSS",
|
||||
" Subtítulos realizados por la comunidad de Amara.org",
|
||||
" Subtitulado por la comunidad de Amara.org",
|
||||
" Subtítulos por la comunidad de Amara.org",
|
||||
" Subtítulos creados por la comunidad de Amara.org",
|
||||
" Subtítulos en español de Amara.org",
|
||||
" Subtítulos hechos por la comunidad de Amara.org",
|
||||
" Subtitulos por la comunidad de Amara.org"
|
||||
" Más información www.alimmenta.com",
|
||||
" www.mooji.org",
|
||||
# gl
|
||||
" Subtítulos realizados por la comunidad de Amara.org"
|
||||
# pt
|
||||
" Legendas pela comunidade Amara.org",
|
||||
" Legendas pela comunidade de Amara.org",
|
||||
" Legendas pela comunidade do Amara.org",
|
||||
" Legendas pela comunidade das Amara.org",
|
||||
" Transcrição e Legendas pela comunidade de Amara.org"
|
||||
# la
|
||||
" Sottotitoli creati dalla comunità Amara.org",
|
||||
" Sous-titres réalisés para la communauté d'Amara.org"
|
||||
# ln
|
||||
" Sous-titres réalisés para la communauté d'Amara.org"
|
||||
# pl
|
||||
" Napisy stworzone przez społeczność Amara.org",
|
||||
" Napisy wykonane przez społeczność Amara.org",
|
||||
" Zdjęcia i napisy stworzone przez społeczność Amara.org",
|
||||
" napisy stworzone przez społeczność Amara.org",
|
||||
" Tłumaczenie i napisy stworzone przez społeczność Amara.org",
|
||||
" Napisy stworzone przez społeczności Amara.org",
|
||||
" Tłumaczenie stworzone przez społeczność Amara.org",
|
||||
" Napisy robione przez społeczność Amara.org"
|
||||
" www.multi-moto.eu",
|
||||
# ru
|
||||
" Редактор субтитров А.Синецкая Корректор А.Егорова"
|
||||
# tr
|
||||
" Yorumlarınızıza abone olmayı unutmayın.",
|
||||
# su
|
||||
" Sottotitoli creati dalla comunità Amara.org"
|
||||
# zh
|
||||
"字幕由Amara.org社区提供",
|
||||
"小編字幕由Amara.org社區提供"
|
||||
]
|
||||
@@ -2,10 +2,13 @@ import json
|
||||
import time
|
||||
|
||||
from typing import Union
|
||||
|
||||
|
||||
from .hallucinations import KNOWN_HALLUCINATIONS
|
||||
|
||||
ALPHABET = [*"abcdefghijklmnopqrstuvwxyz"]
|
||||
|
||||
|
||||
|
||||
class Transcript:
|
||||
"""
|
||||
Class for storing transcript data, including speaker information and text segments,
|
||||
@@ -23,6 +26,7 @@ class Transcript:
|
||||
"""
|
||||
|
||||
self.transcript = transcript
|
||||
self._remove_hallucinations()
|
||||
self.speakers = self._extract_speakers()
|
||||
self.segments = self._extract_segments()
|
||||
self.annotation = {}
|
||||
@@ -62,6 +66,20 @@ class Transcript:
|
||||
|
||||
return self
|
||||
|
||||
def _remove_hallucinations(self) -> None:
|
||||
"""
|
||||
Removes all occurances of known hallucinations from all segments of the transcript.
|
||||
Segments that are identical to empty strings afterwards are removed from the transcript.
|
||||
"""
|
||||
segments_to_drop=[]
|
||||
for id in self.transcript:
|
||||
for snippet in KNOWN_HALLUCINATIONS:
|
||||
self.transcript[id]['text']=self.transcript[id]['text'].replace(snippet,'')
|
||||
if self.transcript[id]['text'] == '': segments_to_drop.append(id)
|
||||
|
||||
for id in segments_to_drop:
|
||||
del self.transcript[id]
|
||||
|
||||
def _extract_speakers(self) -> list:
|
||||
"""
|
||||
Extracts the unique speaker names from the transcript.
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath('../'))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'ScrAIbe: Streamlined Conversation Recording with Automated Intelligence Based Environment'
|
||||
copyright = '2023, Jacob Schmieder'
|
||||
author = 'Jacob Schmieder'
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = '0.1.1'
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['sphinx.ext.autodoc',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.coverage',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.ifconfig',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.githubpages',
|
||||
'sphinx.ext.napoleon',
|
||||
'myst_parser']
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = True
|
||||
napoleon_include_init_with_doc = True
|
||||
napoleon_include_private_with_doc = True
|
||||
napoleon_include_special_with_doc = True
|
||||
napoleon_use_admonition_for_examples = False
|
||||
napoleon_use_admonition_for_notes = False
|
||||
napoleon_use_admonition_for_references = False
|
||||
napoleon_use_ivar = False
|
||||
napoleon_use_param = True
|
||||
napoleon_use_rtype = True
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = []
|
||||
|
||||
# Add source file parsers
|
||||
source_suffix = {
|
||||
'.rst': 'restructuredtext',
|
||||
'.txt': 'markdown',
|
||||
'.md': 'markdown',
|
||||
}
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
@@ -0,0 +1,21 @@
|
||||
Welcome to ScrAIbe: Streamlined Conversation Recording with Automated Intelligence Based Environment's documentation!
|
||||
=====================================================================================================================
|
||||
|
||||
.. automodule:: scraibe
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
../README.md
|
||||
|
||||
modules
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@@ -0,0 +1,7 @@
|
||||
scraibe
|
||||
=======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
scraibe
|
||||
Reference in New Issue
Block a user