From 7c8185e94bff2265be5a5b108bc33b4e5978bf03 Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:04:51 +0100 Subject: [PATCH 1/9] Create mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/mirror_to_gitlab.yml diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml new file mode 100644 index 0000000..88f1c8a --- /dev/null +++ b/.github/workflows/mirror_to_gitlab.yml @@ -0,0 +1,21 @@ +name: Mirror and run GitLab CI + +on: [push, delete] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Mirror + trigger CI + uses: SvanBoxel/gitlab-mirror-and-ci-action@master + with: + args: "https://git-dmz.thuenen.de/kida/i2-skills-beratungsstelle/scraibe" + env: + FOLLOW_TAGS: "false" + FORCE_PUSH: "false" + GITLAB_HOSTNAME: "git-dmz.thuenen.de" + GITLAB_USERNAME: ${{ secrets.GITLAB_USERNAME }} + GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} // Generate here: https://gitlab.com/profile/personal_access_tokens + GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }} // https://gitlab.com///edit + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} // https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret From 034ef1a711ae160bc0fdf67a8592ee3e31fc66af Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:29:39 +0100 Subject: [PATCH 2/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index 88f1c8a..52b8d86 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -18,4 +18,4 @@ jobs: GITLAB_USERNAME: ${{ secrets.GITLAB_USERNAME }} GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} // Generate here: https://gitlab.com/profile/personal_access_tokens GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }} // https://gitlab.com///edit - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} // https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret + GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} // https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret From 942055eafca224f6fa9e565593f1ce9f6b1c76e9 Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:31:40 +0100 Subject: [PATCH 3/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index 52b8d86..4c18d68 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -16,6 +16,6 @@ jobs: FORCE_PUSH: "false" GITLAB_HOSTNAME: "git-dmz.thuenen.de" GITLAB_USERNAME: ${{ secrets.GITLAB_USERNAME }} - GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} // Generate here: https://gitlab.com/profile/personal_access_tokens - GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }} // https://gitlab.com///edit - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} // https://docs.github.com/en/actions/reference/authentication-in-a-workflow#about-the-github_token-secret + GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} + GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }} + GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} From 072168334d64698d8443249315fe7eb03a8add71 Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:42:14 +0100 Subject: [PATCH 4/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index 4c18d68..eea0809 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -13,7 +13,7 @@ jobs: args: "https://git-dmz.thuenen.de/kida/i2-skills-beratungsstelle/scraibe" env: FOLLOW_TAGS: "false" - FORCE_PUSH: "false" + FORCE_PUSH: "true" GITLAB_HOSTNAME: "git-dmz.thuenen.de" GITLAB_USERNAME: ${{ secrets.GITLAB_USERNAME }} GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} From 32f0f070f965ed79cb849dec49ed04e96ac164c2 Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:45:01 +0100 Subject: [PATCH 5/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index eea0809..a32712e 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -12,7 +12,7 @@ jobs: with: args: "https://git-dmz.thuenen.de/kida/i2-skills-beratungsstelle/scraibe" env: - FOLLOW_TAGS: "false" + FOLLOW_TAGS: "true" FORCE_PUSH: "true" GITLAB_HOSTNAME: "git-dmz.thuenen.de" GITLAB_USERNAME: ${{ secrets.GITLAB_USERNAME }} From 65ca71fc9171e90f7e4188c8c91e604d9de1dc2a Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:50:09 +0100 Subject: [PATCH 6/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index a32712e..74eaca0 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -6,7 +6,9 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 + with: + - fetch-depth: 0 - name: Mirror + trigger CI uses: SvanBoxel/gitlab-mirror-and-ci-action@master with: From 2499dd1d17f13d06e3b36f80940dcbf34ad6d69e Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Thu, 7 Dec 2023 13:51:51 +0100 Subject: [PATCH 7/9] Update mirror_to_gitlab.yml --- .github/workflows/mirror_to_gitlab.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mirror_to_gitlab.yml b/.github/workflows/mirror_to_gitlab.yml index 74eaca0..b100359 100644 --- a/.github/workflows/mirror_to_gitlab.yml +++ b/.github/workflows/mirror_to_gitlab.yml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v3 with: - - fetch-depth: 0 + fetch-depth: 0 - name: Mirror + trigger CI uses: SvanBoxel/gitlab-mirror-and-ci-action@master with: From 9c0766fc41a3ede97fcc580a817db16ac7779f84 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 9 Feb 2024 11:35:38 +0100 Subject: [PATCH 8/9] updated dependencies now scraibe works with torch 2 --- scraibe/diarisation.py | 16 ++++++++++++---- scraibe/misc.py | 4 +++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/scraibe/diarisation.py b/scraibe/diarisation.py index f90bcdb..1a33817 100644 --- a/scraibe/diarisation.py +++ b/scraibe/diarisation.py @@ -34,6 +34,8 @@ from typing import TypeVar, Union from pyannote.audio import Pipeline from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization from torch import Tensor +from torch import device as torch_device +from torch.cuda import is_available, current_device from .misc import PYANNOTE_DEFAULT_PATH, PYANNOTE_DEFAULT_CONFIG Annotation = TypeVar('Annotation') @@ -184,6 +186,7 @@ class Diariser: cache_token: bool = True, cache_dir: Union[Path, str] = PYANNOTE_DEFAULT_PATH, hparams_file: Union[str, Path] = None, + device: str = None, *args, **kwargs ) -> Pipeline: @@ -198,6 +201,7 @@ class Diariser: cache_token: Whether to cache the token locally for future use. cache_dir: Directory for caching models. hparams_file: Path to a YAML file containing hyperparameters. + device: Device to load the model on. args: Additional arguments only to avoid errors. kwargs: Additional keyword arguments only to avoid errors. @@ -205,20 +209,24 @@ class Diariser: Pipeline: A pyannote.audio Pipeline object, encapsulating the loaded model. """ + if cache_token and use_auth_token is not None: cls._save_token(use_auth_token) if not os.path.exists(model) and use_auth_token is None: use_auth_token = cls._get_token() - model = 'pyannote/speaker-diarization' - elif not os.path.exists(model) and use_auth_token is not None: - model = 'pyannote/speaker-diarization' - + _model = Pipeline.from_pretrained(model, use_auth_token = use_auth_token, cache_dir = cache_dir, hparams_file = hparams_file,) + # try to move the model to the device + if device is None: + device = "cuda" if is_available() else "cpu" + + _model = _model.to(torch_device(device)) # torch_device is renamed from torch.device to avoid name conflict + if _model is None: raise ValueError('Unable to load model either from local cache' \ 'or from huggingface.co models. Please check your token' \ diff --git a/scraibe/misc.py b/scraibe/misc.py index b1afeea..c912478 100644 --- a/scraibe/misc.py +++ b/scraibe/misc.py @@ -12,7 +12,9 @@ if CACHE_DIR != PYANNOTE_CACHE_DIR: WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper") PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote") -PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") +PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") \ + if os.path.exists(os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")) \ + else 'pyannote/speaker-diarization-3.1' def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None: """Configure diarization pipeline from a YAML file. From df79a78a47bc1aabf3f92f9df9703f9b4261d212 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 9 Feb 2024 12:17:43 +0100 Subject: [PATCH 9/9] updated dependency list --- requirements.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index aed43e8..8cf1782 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,12 @@ -openai-whisper==20230314 +torch~=2.2.0 + +openai-whisper~=20231117 numpy~=1.23.5 -pyannote.audio~=2.1.1 -pyannote.core~=4.5 -pyannote.database~=4.1.3 +pyannote.audio~=3.1.1 +pyannote.core~=5.0.0 +pyannote.database~=5.0.1 pyannote.metrics~=3.2.1 -pyannote.pipeline~=2.3 +pyannote.pipeline~=3.0.1 setuptools~=65.6.3 setuptools-rust~=1.5.2