Edited README with the codesnippets
Browse filesSigned-off-by: taejinp <[email protected]>
README.md
CHANGED
|
@@ -264,11 +264,8 @@ The model is available for use in the NeMo Framework[7], and can be used as a pr
|
|
| 264 |
from nemo.collections.asr.models import SortformerEncLabelModel, ASRModel
|
| 265 |
import torch
|
| 266 |
# A speaker diarization model is needed for tracking the speech activity of each speaker.
|
| 267 |
-
diar_model = SortformerEncLabelModel.from_pretrained("nvidia/diar_streaming_sortformer_4spk-v2.1")
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
asr_model = ASRModel.from_pretrained("nvidia/multitalker-parakeet-streaming-0.6b-v1.nemo")
|
| 271 |
-
asr_model.eval().to(torch.device("cuda"))
|
| 272 |
|
| 273 |
# Use the pre-defined dataclass template `MultitalkerTranscriptionConfig` from `multitalker_transcript_config.py`.
|
| 274 |
# Configure the diarization model using streaming parameters:
|
|
@@ -314,26 +311,28 @@ for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer_iter):
|
|
| 314 |
)
|
| 315 |
|
| 316 |
# Generate the speaker-tagged transcript and print it.
|
| 317 |
-
|
| 318 |
-
print(seglst_dict_list)
|
| 319 |
```
|
| 320 |
|
| 321 |
### Method 2. Use NeMo example file in NVIDIA/NeMo
|
| 322 |
|
| 323 |
-
Use [
|
| 324 |
-
```
|
| 325 |
python ${NEMO_ROOT}/examples/asr/asr_cache_aware_streaming/speech_to_text_multitalker_streaming_infer.py \
|
| 326 |
-
asr_model=
|
| 327 |
-
diar_model=nvidia/diar_streaming_sortformer_4spk-v2 \
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
| 330 |
```
|
| 331 |
|
| 332 |
Or the `audio_file` argument can be replaced with the `manifest_file` to handle multiple files in batch mode:
|
| 333 |
-
```
|
| 334 |
python ${NEMO_ROOT}/examples/asr/asr_cache_aware_streaming/speech_to_text_multitalker_streaming_infer.py \
|
| 335 |
... \
|
| 336 |
-
manifest_file=example.json \
|
| 337 |
... \
|
| 338 |
```
|
| 339 |
|
|
|
|
| 264 |
from nemo.collections.asr.models import SortformerEncLabelModel, ASRModel
|
| 265 |
import torch
|
| 266 |
# A speaker diarization model is needed for tracking the speech activity of each speaker.
|
| 267 |
+
diar_model = SortformerEncLabelModel.from_pretrained("nvidia/diar_streaming_sortformer_4spk-v2.1").eval().to(torch.device("cuda"))
|
| 268 |
+
asr_model = ASRModel.from_pretrained("nvidia/multitalker-parakeet-streaming-0.6b-v1.nemo").eval().to(torch.device("cuda"))
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
# Use the pre-defined dataclass template `MultitalkerTranscriptionConfig` from `multitalker_transcript_config.py`.
|
| 271 |
# Configure the diarization model using streaming parameters:
|
|
|
|
| 311 |
)
|
| 312 |
|
| 313 |
# Generate the speaker-tagged transcript and print it.
|
| 314 |
+
multispk_asr_streamer.generate_seglst_dicts_from_parallel_streaming(samples=samples)
|
| 315 |
+
print(multispk_asr_streamer.instance_manager.seglst_dict_list)
|
| 316 |
```
|
| 317 |
|
| 318 |
### Method 2. Use NeMo example file in NVIDIA/NeMo
|
| 319 |
|
| 320 |
+
Use [the multitalker streaming ASR example script file](https://github.com/NVIDIA-NeMo/NeMo/blob/main/examples/asr/asr_cache_aware_streaming/speech_to_text_multitalker_streaming_infer.py) in [NVIDIA NeMo Framework](https://github.com/NVIDIA-NeMo/NeMo) to launch. With this method, download the `.nemo` model files and specify that in the script:
|
| 321 |
+
```bash
|
| 322 |
python ${NEMO_ROOT}/examples/asr/asr_cache_aware_streaming/speech_to_text_multitalker_streaming_infer.py \
|
| 323 |
+
asr_model="/path/to/your/multitalker-parakeet-streaming-0.6b-v1.nemo" \
|
| 324 |
+
diar_model="/path/to/your/nvidia/diar_streaming_sortformer_4spk-v2.nemo" \
|
| 325 |
+
att_context_size="[70,13]" \
|
| 326 |
+
generate_realtime_scripts=False \
|
| 327 |
+
audio_file="/path/to/example.wav" \
|
| 328 |
+
output_path="/path/to/example_output.json"
|
| 329 |
```
|
| 330 |
|
| 331 |
Or the `audio_file` argument can be replaced with the `manifest_file` to handle multiple files in batch mode:
|
| 332 |
+
```bash
|
| 333 |
python ${NEMO_ROOT}/examples/asr/asr_cache_aware_streaming/speech_to_text_multitalker_streaming_infer.py \
|
| 334 |
... \
|
| 335 |
+
manifest_file="example.json" \
|
| 336 |
... \
|
| 337 |
```
|
| 338 |
|