raphaelmerx commited on
Commit
9fdfc76
·
1 Parent(s): f6a94c1

Use tetun voice

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +1 -1
  3. app.py +18 -15
  4. piper/voices.json +0 -0
  5. requirements.txt +2 -4
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Piper
3
  emoji: 🐢
4
  colorFrom: blue
5
  colorTo: purple
 
1
  ---
2
+ title: Piper Tetun
3
  emoji: 🐢
4
  colorFrom: blue
5
  colorTo: purple
app.py CHANGED
@@ -3,25 +3,23 @@ import wave
3
  import numpy as np
4
  from io import BytesIO
5
  from huggingface_hub import hf_hub_download
6
- from piper import PiperVoice
7
- from transformers import pipeline
8
 
9
- # Load the NSFW classifier model
10
- nsfw_detector = pipeline("text-classification", model="michellejieli/NSFW_text_classifier")
11
 
12
  def synthesize_speech(text):
13
- # Check for NSFW content
14
- nsfw_result = nsfw_detector(text)
15
- if nsfw_result[0]['label'] == 'NSFW':
16
- return "NSFW content detected. Cannot process.", None
17
-
18
- model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
19
- config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
 
20
  voice = PiperVoice.load(model_path, config_path)
21
 
22
  # Create an in-memory buffer for the WAV file
23
  buffer = BytesIO()
24
- with wave.open(buffer, 'wb') as wav_file:
25
  wav_file.setframerate(voice.config.sample_rate)
26
  wav_file.setsampwidth(2) # 16-bit
27
  wav_file.setnchannels(1) # mono
@@ -35,16 +33,21 @@ def synthesize_speech(text):
35
 
36
  return audio_data.tobytes(), None
37
 
 
38
  # Using Gradio Blocks
39
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
40
  gr.Markdown("# Text to Speech Synthesizer")
41
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
42
  input_text = gr.Textbox(label="Input Text")
43
  output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
44
- output_text = gr.Textbox(label="Output Text", visible=False) # This is the new text output component
 
 
45
  submit_button = gr.Button("Synthesize")
46
 
47
- submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
 
 
48
 
49
  # Run the app
50
- blocks.launch()
 
3
  import numpy as np
4
  from io import BytesIO
5
  from huggingface_hub import hf_hub_download
6
+ from piper import PiperVoice
 
7
 
 
 
8
 
9
  def synthesize_speech(text):
10
+ model_path = hf_hub_download(
11
+ repo_id="raphaelmerx/piper-voices",
12
+ filename="tdt/tdt_TL/joao/medium/tdt_TL-joao-medium.onnx",
13
+ )
14
+ config_path = hf_hub_download(
15
+ repo_id="raphaelmerx/piper-voices",
16
+ filename="tdt/tdt_TL/joao/medium/tdt_TL-joao-medium.onnx.json",
17
+ )
18
  voice = PiperVoice.load(model_path, config_path)
19
 
20
  # Create an in-memory buffer for the WAV file
21
  buffer = BytesIO()
22
+ with wave.open(buffer, "wb") as wav_file:
23
  wav_file.setframerate(voice.config.sample_rate)
24
  wav_file.setsampwidth(2) # 16-bit
25
  wav_file.setnchannels(1) # mono
 
33
 
34
  return audio_data.tobytes(), None
35
 
36
+
37
  # Using Gradio Blocks
38
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
39
  gr.Markdown("# Text to Speech Synthesizer")
40
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
41
  input_text = gr.Textbox(label="Input Text")
42
  output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
43
+ output_text = gr.Textbox(
44
+ label="Output Text", visible=False
45
+ ) # This is the new text output component
46
  submit_button = gr.Button("Synthesize")
47
 
48
+ submit_button.click(
49
+ synthesize_speech, inputs=input_text, outputs=[output_audio, output_text]
50
+ )
51
 
52
  # Run the app
53
+ blocks.launch()
piper/voices.json CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
- torch
2
- transformers
3
  piper-tts
4
- piper-phonemize~=1.1.0
5
- onnxruntime>=1.11.0,<2
 
 
 
1
  piper-tts
2
+ piper-phonemize
3
+ onnxruntime>=1.11.0,<2