Upload 10 files
Browse filesUpdated some features
- README.md +3 -16
- WarBot.py +132 -0
- WarBot_test.ipynb +129 -24
- WarClient.py +14 -0
- WarServer.py +25 -0
- latest_silero_models.yml +563 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
- kertser/WarOnline
|
5 |
-
language:
|
6 |
-
- ru
|
7 |
-
tags:
|
8 |
-
- chatbot
|
9 |
-
- WarOnline
|
10 |
-
- NLP
|
11 |
-
- GPT2
|
12 |
-
---
|
13 |
-
This is a GPT-style model, based on "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram" model and fine-tuned on the WarOnline Dataset.<br>
|
14 |
-
https://huggingface.co/Kirili4ik/ruDialoGpt3-medium-finetuned-telegram<br>
|
15 |
-
The model operates as a chat-bot by means of "Quote" -> "Response"<br>
|
16 |
-
The model fine-tuned with a single GPU (locally)
|
|
|
1 |
+
# WarOnline_Bot
|
2 |
+
This is a GPT based chat bot, trained on WarOnline Conversation<br>
|
3 |
+
It is not ready yet
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
WarBot.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer ,AutoModelForCausalLM
|
2 |
+
import re
|
3 |
+
# Speller and punctuation:
|
4 |
+
import os
|
5 |
+
import yaml
|
6 |
+
import torch
|
7 |
+
from torch import package
|
8 |
+
# not very necessary
|
9 |
+
import textwrap
|
10 |
+
from textwrap3 import wrap
|
11 |
+
|
12 |
+
# util function to get expected len after tokenizing
|
13 |
+
def get_length_param(text: str, tokenizer) -> str:
|
14 |
+
tokens_count = len(tokenizer.encode(text))
|
15 |
+
if tokens_count <= 15:
|
16 |
+
len_param = '1'
|
17 |
+
elif tokens_count <= 50:
|
18 |
+
len_param = '2'
|
19 |
+
elif tokens_count <= 256:
|
20 |
+
len_param = '3'
|
21 |
+
else:
|
22 |
+
len_param = '-'
|
23 |
+
return len_param
|
24 |
+
|
25 |
+
def remove_duplicates(S):
|
26 |
+
S = re.sub(r'[a-zA-Z]+', '', S) #Remove english
|
27 |
+
S = S.split()
|
28 |
+
result = ""
|
29 |
+
for subst in S:
|
30 |
+
if subst not in result:
|
31 |
+
result += subst+" "
|
32 |
+
return result.rstrip()
|
33 |
+
|
34 |
+
def removeSigns(S):
|
35 |
+
last_index = max(S.rfind("."), S.rfind("!"))
|
36 |
+
if last_index >= 0:
|
37 |
+
S = S[:last_index+1]
|
38 |
+
return S
|
39 |
+
|
40 |
+
def prepare_punct():
|
41 |
+
torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
|
42 |
+
'latest_silero_models.yml',
|
43 |
+
progress=False)
|
44 |
+
|
45 |
+
with open('latest_silero_models.yml', 'r') as yaml_file:
|
46 |
+
models = yaml.load(yaml_file, Loader=yaml.SafeLoader)
|
47 |
+
model_conf = models.get('te_models').get('latest')
|
48 |
+
|
49 |
+
# Prepare punctuation fix
|
50 |
+
model_url = model_conf.get('package')
|
51 |
+
|
52 |
+
model_dir = "downloaded_model"
|
53 |
+
os.makedirs(model_dir, exist_ok=True)
|
54 |
+
model_path = os.path.join(model_dir, os.path.basename(model_url))
|
55 |
+
|
56 |
+
if not os.path.isfile(model_path):
|
57 |
+
torch.hub.download_url_to_file(model_url,
|
58 |
+
model_path,
|
59 |
+
progress=True)
|
60 |
+
|
61 |
+
imp = package.PackageImporter(model_path)
|
62 |
+
model_punct = imp.load_pickle("te_model", "model")
|
63 |
+
|
64 |
+
return model_punct
|
65 |
+
|
66 |
+
def initialize():
|
67 |
+
""" Loading the model """
|
68 |
+
fit_checkpoint = "WarBot"
|
69 |
+
tokenizer = AutoTokenizer.from_pretrained(fit_checkpoint)
|
70 |
+
model = AutoModelForCausalLM.from_pretrained(fit_checkpoint)
|
71 |
+
model_punсt = prepare_punct()
|
72 |
+
return (model,tokenizer,model_punсt)
|
73 |
+
|
74 |
+
def split_string(string,n=256):
|
75 |
+
return [string[i:i+n] for i in range(0, len(string), n)]
|
76 |
+
|
77 |
+
def get_response(quote:str,model,tokenizer,model_punct):
|
78 |
+
# encode the input, add the eos_token and return a tensor in Pytorch
|
79 |
+
user_inpit_ids = tokenizer.encode(f"|0|{get_length_param(quote, tokenizer)}|" \
|
80 |
+
+ quote + tokenizer.eos_token, return_tensors="pt")
|
81 |
+
|
82 |
+
chat_history_ids = user_inpit_ids # To be changed
|
83 |
+
|
84 |
+
tokens_count = len(tokenizer.encode(quote))
|
85 |
+
if tokens_count < 15:
|
86 |
+
no_repeat_ngram_size = 2
|
87 |
+
else:
|
88 |
+
no_repeat_ngram_size = 1
|
89 |
+
|
90 |
+
output_id = model.generate(
|
91 |
+
chat_history_ids,
|
92 |
+
num_return_sequences=1, # use for more variants, but have to print [i]
|
93 |
+
max_length=200, #512
|
94 |
+
no_repeat_ngram_size=no_repeat_ngram_size, #3
|
95 |
+
do_sample=True, #True
|
96 |
+
top_k=50,#50
|
97 |
+
top_p=0.9, #0.9
|
98 |
+
temperature = 0.4, # was 0.6, 0 for greedy
|
99 |
+
#mask_token_id=tokenizer.mask_token_id,
|
100 |
+
eos_token_id=tokenizer.eos_token_id,
|
101 |
+
#unk_token_id=tokenizer.unk_token_id,
|
102 |
+
pad_token_id=tokenizer.pad_token_id,
|
103 |
+
#pad_token_id=tokenizer.eos_token_id,
|
104 |
+
#device='cpu'
|
105 |
+
)
|
106 |
+
|
107 |
+
response = tokenizer.decode(output_id[0], skip_special_tokens=True)
|
108 |
+
response = removeSigns(response)
|
109 |
+
response = response.split(quote)[-1] # Remove the Quote
|
110 |
+
response = re.sub(r'[^0-9А-Яа-яЁёa-zA-z;., !()/\-+:?]', '',
|
111 |
+
response) # Clear the response, remains only alpha-numerical values
|
112 |
+
response = remove_duplicates(re.sub(r"\d{4,}", "", response)) # Remove the consequent numbers with 4 or more digits
|
113 |
+
response = re.sub(r'\.\.+', '', response) # Remove the "....." thing
|
114 |
+
|
115 |
+
if len(response)>200:
|
116 |
+
resps = wrap(response,200)
|
117 |
+
for i in range(len(resps)):
|
118 |
+
resps[i] = model_punct.enhance_text(resps[i], lan='ru')
|
119 |
+
response = ''.join(resps)
|
120 |
+
else:
|
121 |
+
response = model_punct.enhance_text(response, lan='ru')
|
122 |
+
|
123 |
+
response = re.sub(r'[UNK]', '', response) # Remove the [UNK] thing
|
124 |
+
return response
|
125 |
+
|
126 |
+
#if __name__ == '__main__':
|
127 |
+
#model,tokenizer,model_punct = initialize()
|
128 |
+
#quote = "Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется"
|
129 |
+
#print('please wait...')
|
130 |
+
#response = wrap(get_response(quote,model,tokenizer,model_punct),60)
|
131 |
+
#for phrase in response:
|
132 |
+
# print(phrase)
|
WarBot_test.ipynb
CHANGED
@@ -9,11 +9,73 @@
|
|
9 |
"outputs": [],
|
10 |
"source": [
|
11 |
"from transformers import AutoTokenizer ,AutoModelForCausalLM\n",
|
12 |
-
"import
|
13 |
-
"import re\n",
|
14 |
-
"from sklearn.utils import shuffle"
|
15 |
]
|
16 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
{
|
18 |
"cell_type": "code",
|
19 |
"execution_count": 2,
|
@@ -38,7 +100,7 @@
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"outputs": [],
|
43 |
"source": [
|
44 |
"def remove_duplicates(S):\n",
|
@@ -56,7 +118,7 @@
|
|
56 |
},
|
57 |
{
|
58 |
"cell_type": "code",
|
59 |
-
"execution_count":
|
60 |
"outputs": [],
|
61 |
"source": [
|
62 |
"fit_checkpoint = \"WarBot\"\n",
|
@@ -69,10 +131,11 @@
|
|
69 |
},
|
70 |
{
|
71 |
"cell_type": "code",
|
72 |
-
"execution_count":
|
73 |
"outputs": [],
|
74 |
"source": [
|
75 |
-
"quote = \"
|
|
|
76 |
],
|
77 |
"metadata": {
|
78 |
"collapsed": false
|
@@ -80,26 +143,38 @@
|
|
80 |
},
|
81 |
{
|
82 |
"cell_type": "code",
|
83 |
-
"execution_count":
|
84 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
"source": [
|
86 |
"# encode the input, add the eos_token and return a tensor in Pytorch\n",
|
87 |
"user_inpit_ids = tokenizer.encode(f\"|0|{get_length_param(quote, tokenizer)}|\" \\\n",
|
88 |
" + quote + tokenizer.eos_token, return_tensors=\"pt\")\n",
|
89 |
"\n",
|
90 |
-
"#chat_history_ids = torch.cat([chat_history_ids, user_inpit_ids], dim=-1)\n",
|
91 |
-
"\n",
|
92 |
"chat_history_ids = user_inpit_ids # To be changed\n",
|
93 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
"output_id = model.generate(\n",
|
95 |
" chat_history_ids,\n",
|
96 |
-
" num_return_sequences=
|
97 |
" max_length=300, #512\n",
|
98 |
-
" no_repeat_ngram_size=
|
99 |
" do_sample=True, #True\n",
|
100 |
" top_k=50,#50\n",
|
101 |
" top_p=0.9, #0.9\n",
|
102 |
-
" temperature = 0.
|
103 |
" #mask_token_id=tokenizer.mask_token_id,\n",
|
104 |
" eos_token_id=tokenizer.eos_token_id,\n",
|
105 |
" #unk_token_id=tokenizer.unk_token_id,\n",
|
@@ -114,7 +189,7 @@
|
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
-
"execution_count":
|
118 |
"outputs": [],
|
119 |
"source": [
|
120 |
"def removeSigns(S):\n",
|
@@ -135,10 +210,10 @@
|
|
135 |
"def getResponce():\n",
|
136 |
" response = tokenizer.decode(output_id[0], skip_special_tokens=True)\n",
|
137 |
" response = removeSigns(response)\n",
|
138 |
-
"
|
139 |
-
"
|
140 |
-
"
|
141 |
-
" return
|
142 |
],
|
143 |
"metadata": {
|
144 |
"collapsed": false
|
@@ -146,13 +221,13 @@
|
|
146 |
},
|
147 |
{
|
148 |
"cell_type": "code",
|
149 |
-
"execution_count":
|
150 |
"outputs": [
|
151 |
{
|
152 |
"name": "stdout",
|
153 |
"output_type": "stream",
|
154 |
"text": [
|
155 |
-
"Response:
|
156 |
]
|
157 |
}
|
158 |
],
|
@@ -163,6 +238,36 @@
|
|
163 |
"collapsed": false
|
164 |
}
|
165 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
{
|
167 |
"cell_type": "markdown",
|
168 |
"source": [
|
@@ -174,7 +279,7 @@
|
|
174 |
},
|
175 |
{
|
176 |
"cell_type": "code",
|
177 |
-
"execution_count":
|
178 |
"outputs": [],
|
179 |
"source": [
|
180 |
"from autocorrect import Speller\n",
|
@@ -187,13 +292,13 @@
|
|
187 |
},
|
188 |
{
|
189 |
"cell_type": "code",
|
190 |
-
"execution_count":
|
191 |
"outputs": [
|
192 |
{
|
193 |
"name": "stdout",
|
194 |
"output_type": "stream",
|
195 |
"text": [
|
196 |
-
"
|
197 |
]
|
198 |
}
|
199 |
],
|
|
|
9 |
"outputs": [],
|
10 |
"source": [
|
11 |
"from transformers import AutoTokenizer ,AutoModelForCausalLM\n",
|
12 |
+
"import re"
|
|
|
|
|
13 |
]
|
14 |
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 56,
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"# Speller and punctuation\n",
|
21 |
+
"\n",
|
22 |
+
"import os\n",
|
23 |
+
"import yaml\n",
|
24 |
+
"import torch\n",
|
25 |
+
"from torch import package\n",
|
26 |
+
"\n",
|
27 |
+
"torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',\n",
|
28 |
+
" 'latest_silero_models.yml',\n",
|
29 |
+
" progress=False)\n",
|
30 |
+
"\n",
|
31 |
+
"with open('latest_silero_models.yml', 'r') as yaml_file:\n",
|
32 |
+
" models = yaml.load(yaml_file, Loader=yaml.SafeLoader)\n",
|
33 |
+
"model_conf = models.get('te_models').get('latest')"
|
34 |
+
],
|
35 |
+
"metadata": {
|
36 |
+
"collapsed": false
|
37 |
+
}
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 57,
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"data": {
|
45 |
+
"text/plain": " 0%| | 0.00/87.5M [00:00<?, ?B/s]",
|
46 |
+
"application/vnd.jupyter.widget-view+json": {
|
47 |
+
"version_major": 2,
|
48 |
+
"version_minor": 0,
|
49 |
+
"model_id": "2c9c9ff9721046ad89665fbf4f6dd275"
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"metadata": {},
|
53 |
+
"output_type": "display_data"
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"source": [
|
57 |
+
"# Prepare punctuation fix for test\n",
|
58 |
+
"model_url = model_conf.get('package')\n",
|
59 |
+
"\n",
|
60 |
+
"model_dir = \"downloaded_model\"\n",
|
61 |
+
"os.makedirs(model_dir, exist_ok=True)\n",
|
62 |
+
"model_path = os.path.join(model_dir, os.path.basename(model_url))\n",
|
63 |
+
"\n",
|
64 |
+
"if not os.path.isfile(model_path):\n",
|
65 |
+
" torch.hub.download_url_to_file(model_url,\n",
|
66 |
+
" model_path,\n",
|
67 |
+
" progress=True)\n",
|
68 |
+
"\n",
|
69 |
+
"imp = package.PackageImporter(model_path)\n",
|
70 |
+
"model = imp.load_pickle(\"te_model\", \"model\")\n",
|
71 |
+
"\n",
|
72 |
+
"def apply_te(text, lan='ru'):\n",
|
73 |
+
" return model.enhance_text(text, lan)"
|
74 |
+
],
|
75 |
+
"metadata": {
|
76 |
+
"collapsed": false
|
77 |
+
}
|
78 |
+
},
|
79 |
{
|
80 |
"cell_type": "code",
|
81 |
"execution_count": 2,
|
|
|
100 |
},
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
+
"execution_count": 3,
|
104 |
"outputs": [],
|
105 |
"source": [
|
106 |
"def remove_duplicates(S):\n",
|
|
|
118 |
},
|
119 |
{
|
120 |
"cell_type": "code",
|
121 |
+
"execution_count": 4,
|
122 |
"outputs": [],
|
123 |
"source": [
|
124 |
"fit_checkpoint = \"WarBot\"\n",
|
|
|
131 |
},
|
132 |
{
|
133 |
"cell_type": "code",
|
134 |
+
"execution_count": 36,
|
135 |
"outputs": [],
|
136 |
"source": [
|
137 |
+
"quote = \"Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется.\\\n",
|
138 |
+
"Особенно запомнилась картина, когда на проезжающий авто набрасывается штук десять негров и бьют его камнями, запрыгивают на капот и крышу, пытаются через лобовик достать парня-водителя. Жутковато. И им это сошло с рук.\""
|
139 |
],
|
140 |
"metadata": {
|
141 |
"collapsed": false
|
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
+
"execution_count": 37,
|
147 |
+
"outputs": [
|
148 |
+
{
|
149 |
+
"name": "stdout",
|
150 |
+
"output_type": "stream",
|
151 |
+
"text": [
|
152 |
+
"90\n"
|
153 |
+
]
|
154 |
+
}
|
155 |
+
],
|
156 |
"source": [
|
157 |
"# encode the input, add the eos_token and return a tensor in Pytorch\n",
|
158 |
"user_inpit_ids = tokenizer.encode(f\"|0|{get_length_param(quote, tokenizer)}|\" \\\n",
|
159 |
" + quote + tokenizer.eos_token, return_tensors=\"pt\")\n",
|
160 |
"\n",
|
|
|
|
|
161 |
"chat_history_ids = user_inpit_ids # To be changed\n",
|
162 |
"\n",
|
163 |
+
"tokens_count = len(tokenizer.encode(quote))\n",
|
164 |
+
"if tokens_count < 15:\n",
|
165 |
+
" no_repeat_ngram_size = 2\n",
|
166 |
+
"else:\n",
|
167 |
+
" no_repeat_ngram_size = 1\n",
|
168 |
+
"\n",
|
169 |
"output_id = model.generate(\n",
|
170 |
" chat_history_ids,\n",
|
171 |
+
" num_return_sequences=2, # use for more variants, but have to print [i]\n",
|
172 |
" max_length=300, #512\n",
|
173 |
+
" no_repeat_ngram_size=no_repeat_ngram_size, #3\n",
|
174 |
" do_sample=True, #True\n",
|
175 |
" top_k=50,#50\n",
|
176 |
" top_p=0.9, #0.9\n",
|
177 |
+
" temperature = 0.4, # was 0.6, 0 for greedy\n",
|
178 |
" #mask_token_id=tokenizer.mask_token_id,\n",
|
179 |
" eos_token_id=tokenizer.eos_token_id,\n",
|
180 |
" #unk_token_id=tokenizer.unk_token_id,\n",
|
|
|
189 |
},
|
190 |
{
|
191 |
"cell_type": "code",
|
192 |
+
"execution_count": 8,
|
193 |
"outputs": [],
|
194 |
"source": [
|
195 |
"def removeSigns(S):\n",
|
|
|
210 |
"def getResponce():\n",
|
211 |
" response = tokenizer.decode(output_id[0], skip_special_tokens=True)\n",
|
212 |
" response = removeSigns(response)\n",
|
213 |
+
" response = response.split(quote)[-1] #Remove the Quote\n",
|
214 |
+
" response = re.sub(r'[^0-9А-Яа-яЁёa-zA-z;., !()-+:?]', '', response) # Clear the response, remains only alpha-numerical values\n",
|
215 |
+
" response = remove_duplicates(re.sub(r\"\\d{4,}\", \"\", response)) # Remove the consequent numbers with 4 or more digits\n",
|
216 |
+
" return response"
|
217 |
],
|
218 |
"metadata": {
|
219 |
"collapsed": false
|
|
|
221 |
},
|
222 |
{
|
223 |
"cell_type": "code",
|
224 |
+
"execution_count": 64,
|
225 |
"outputs": [
|
226 |
{
|
227 |
"name": "stdout",
|
228 |
"output_type": "stream",
|
229 |
"text": [
|
230 |
+
"Response: я не знаю как там было у вас...но вот вам видео из ливана: эти граждане (в плохом смысле слова) просто тупо ломанулись внутрь! сожалению они были безоружны еще вооружены до зубов....и их всех убили..так что лучше сразу стреляли мне хотябы пытались зайти комне под видом друзей..... готов стрелять хоть сейчас....а потом думал может быть таки стоит подумать где мой пистолет дома если вдруг придется применить...как так..подумайте сами господа...... зызыы для тех кто тут оружие, вы поняли меня правильно! спасибо большое. буду знать теперь обязательно кого нибудь пристрелят случае ))) псы!!! вообщем говоря ситуация такая 1 группа была сама хамасниками поэтому должныватся 2 другая часть которые хотели жить вместе(ну скажем каждый себе злобный буратино).\n"
|
231 |
]
|
232 |
}
|
233 |
],
|
|
|
238 |
"collapsed": false
|
239 |
}
|
240 |
},
|
241 |
+
{
|
242 |
+
"cell_type": "markdown",
|
243 |
+
"source": [
|
244 |
+
"Punctuation:"
|
245 |
+
],
|
246 |
+
"metadata": {
|
247 |
+
"collapsed": false
|
248 |
+
}
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"cell_type": "code",
|
252 |
+
"execution_count": 65,
|
253 |
+
"outputs": [
|
254 |
+
{
|
255 |
+
"name": "stdout",
|
256 |
+
"output_type": "stream",
|
257 |
+
"text": [
|
258 |
+
"Я не знаю, как там было у вас...но вот вам видео из [UNK] Эти граждане (в плохом смысле слова) просто тупо ломанулись внутрь!. Сожалению, они были безоружны еще вооружены до зубов....и их всех убили..так, что лучше сразу стреляли мне, хотябы пытались зайти комне под видом друзей..... готов стрелять хоть сейчас....а Потом думал может быть таки стоит подумать, где мой пистолет дома, если вдруг придется применить...как так..подумайте сами господа...... зызыы для тех, кто тут оружие, вы поняли меня правильно.! спасибо большое. буду знать теперь обязательно кого-нибудь пристрелят случае ))) псы!!! вообщем говоря ситуация. Такая 1. Группа была сама хамасниками, поэтому должныватся 2. Другая часть, которые хотели жить вместе(ну скажем каждый себе злобный буратино)..\n"
|
259 |
+
]
|
260 |
+
}
|
261 |
+
],
|
262 |
+
"source": [
|
263 |
+
"input_text = getResponce()\n",
|
264 |
+
"output_text = apply_te(input_text, lan='ru')\n",
|
265 |
+
"print(output_text)"
|
266 |
+
],
|
267 |
+
"metadata": {
|
268 |
+
"collapsed": false
|
269 |
+
}
|
270 |
+
},
|
271 |
{
|
272 |
"cell_type": "markdown",
|
273 |
"source": [
|
|
|
279 |
},
|
280 |
{
|
281 |
"cell_type": "code",
|
282 |
+
"execution_count": 41,
|
283 |
"outputs": [],
|
284 |
"source": [
|
285 |
"from autocorrect import Speller\n",
|
|
|
292 |
},
|
293 |
{
|
294 |
"cell_type": "code",
|
295 |
+
"execution_count": 42,
|
296 |
"outputs": [
|
297 |
{
|
298 |
"name": "stdout",
|
299 |
"output_type": "stream",
|
300 |
"text": [
|
301 |
+
"я не знаю как там было у вас...но вот вам видео из лимана эти граждане плохом смысле слова просто тупо ломанулись внутрь! сожалению они были безоружны еще вооружены до зубов....и их всех убили..так что лучше сразу стре��яли мне хотябы пытались зайти камне под видом друзей..... готов стрелять хоть сейчас....а потом думал может быть таки стоит подумать где мой пистолет дома если вдруг придется применить...как так..подумайте сами господа...... кызы для тех кто тут оружие, вы поняли меня правильно! спасибо большое. буду знать теперь обязательно кого нибудь пристрелят случае псы!!! вообщем говоря ситуация такая 1 группа была сама хамасниками поэтому должныватся 2 другая часть которые хотели жить вместе скажем каждый себе злобный буратино.\n"
|
302 |
]
|
303 |
}
|
304 |
],
|
WarClient.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import socket
|
2 |
+
|
3 |
+
HOST = 'localhost'
|
4 |
+
PORT = 5000
|
5 |
+
|
6 |
+
message = "Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется"
|
7 |
+
|
8 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as client_socket:
|
9 |
+
client_socket.connect((HOST, PORT))
|
10 |
+
client_socket.sendall(message.encode())
|
11 |
+
print('Wait...')
|
12 |
+
data = client_socket.recv(1024)
|
13 |
+
received_string = data.decode('utf-8')
|
14 |
+
print(f'Received string from server: {received_string}')
|
WarServer.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import socket
|
2 |
+
import WarBot
|
3 |
+
|
4 |
+
model,tokenizer,model_punct = WarBot.initialize()
|
5 |
+
|
6 |
+
HOST = 'localhost'
|
7 |
+
PORT = 5000
|
8 |
+
|
9 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
|
10 |
+
server_socket.bind((HOST, PORT))
|
11 |
+
server_socket.listen()
|
12 |
+
print(f'Server is listening on port {PORT}')
|
13 |
+
while True:
|
14 |
+
conn, addr = server_socket.accept()
|
15 |
+
with conn:
|
16 |
+
print(f'Connected by {addr}')
|
17 |
+
data = conn.recv(1024)
|
18 |
+
received_string = data.decode()
|
19 |
+
print(f'Received string from client: {received_string}')
|
20 |
+
|
21 |
+
response = WarBot.get_response(received_string, model, tokenizer, model_punct)
|
22 |
+
response_string = response
|
23 |
+
|
24 |
+
conn.sendall(response_string.encode())
|
25 |
+
conn.close()
|
latest_silero_models.yml
ADDED
@@ -0,0 +1,563 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pre-trained STT models
|
2 |
+
stt_models:
|
3 |
+
en:
|
4 |
+
latest:
|
5 |
+
meta:
|
6 |
+
name: "en_v6"
|
7 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
8 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
9 |
+
jit: "https://models.silero.ai/models/en/en_v6.jit"
|
10 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
11 |
+
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
|
12 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
|
13 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
|
14 |
+
v6:
|
15 |
+
meta:
|
16 |
+
name: "en_v6"
|
17 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
18 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
19 |
+
jit: "https://models.silero.ai/models/en/en_v6.jit"
|
20 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
21 |
+
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
|
22 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
|
23 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
|
24 |
+
v5:
|
25 |
+
meta:
|
26 |
+
name: "en_v5"
|
27 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
28 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
29 |
+
jit: "https://models.silero.ai/models/en/en_v5.jit"
|
30 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
31 |
+
onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
|
32 |
+
jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
|
33 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
|
34 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
|
35 |
+
v4_0:
|
36 |
+
meta:
|
37 |
+
name: "en_v4_0"
|
38 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
39 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
40 |
+
jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
|
41 |
+
onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
|
42 |
+
v3:
|
43 |
+
meta:
|
44 |
+
name: "en_v3"
|
45 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
46 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
47 |
+
jit: "https://models.silero.ai/models/en/en_v3_jit.model"
|
48 |
+
onnx: "https://models.silero.ai/models/en/en_v3.onnx"
|
49 |
+
jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
|
50 |
+
jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
|
51 |
+
jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
|
52 |
+
onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
|
53 |
+
jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
|
54 |
+
jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
|
55 |
+
onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
|
56 |
+
v2:
|
57 |
+
meta:
|
58 |
+
name: "en_v2"
|
59 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
60 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
61 |
+
jit: "https://models.silero.ai/models/en/en_v2_jit.model"
|
62 |
+
onnx: "https://models.silero.ai/models/en/en_v2.onnx"
|
63 |
+
tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
|
64 |
+
v1:
|
65 |
+
meta:
|
66 |
+
name: "en_v1"
|
67 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
68 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
69 |
+
jit: "https://models.silero.ai/models/en/en_v1_jit.model"
|
70 |
+
onnx: "https://models.silero.ai/models/en/en_v1.onnx"
|
71 |
+
tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
|
72 |
+
de:
|
73 |
+
latest:
|
74 |
+
meta:
|
75 |
+
name: "de_v1"
|
76 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
77 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
78 |
+
jit: "https://models.silero.ai/models/de/de_v1_jit.model"
|
79 |
+
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
|
80 |
+
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
|
81 |
+
v1:
|
82 |
+
meta:
|
83 |
+
name: "de_v1"
|
84 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
85 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
86 |
+
jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
|
87 |
+
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
|
88 |
+
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
|
89 |
+
v3:
|
90 |
+
meta:
|
91 |
+
name: "de_v3"
|
92 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
93 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
94 |
+
jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
|
95 |
+
v4:
|
96 |
+
meta:
|
97 |
+
name: "de_v4"
|
98 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
99 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
100 |
+
jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
|
101 |
+
onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
|
102 |
+
es:
|
103 |
+
latest:
|
104 |
+
meta:
|
105 |
+
name: "es_v1"
|
106 |
+
sample: "https://models.silero.ai/examples/es_sample.wav"
|
107 |
+
labels: "https://models.silero.ai/models/es/es_v1_labels.json"
|
108 |
+
jit: "https://models.silero.ai/models/es/es_v1_jit.model"
|
109 |
+
onnx: "https://models.silero.ai/models/es/es_v1.onnx"
|
110 |
+
tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
|
111 |
+
ua:
|
112 |
+
latest:
|
113 |
+
meta:
|
114 |
+
name: "ua_v3"
|
115 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
116 |
+
credits:
|
117 |
+
datasets:
|
118 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
119 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
120 |
+
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
|
121 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
|
122 |
+
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
|
123 |
+
v3:
|
124 |
+
meta:
|
125 |
+
name: "ua_v3"
|
126 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
127 |
+
credits:
|
128 |
+
datasets:
|
129 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
130 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
131 |
+
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
|
132 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
|
133 |
+
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
|
134 |
+
v1:
|
135 |
+
meta:
|
136 |
+
name: "ua_v1"
|
137 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
138 |
+
credits:
|
139 |
+
datasets:
|
140 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
141 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
142 |
+
jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
|
143 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
|
144 |
+
tts_models:
|
145 |
+
ru:
|
146 |
+
v3_1_ru:
|
147 |
+
latest:
|
148 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
149 |
+
package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
|
150 |
+
sample_rate: [8000, 24000, 48000]
|
151 |
+
ru_v3:
|
152 |
+
latest:
|
153 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
154 |
+
package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
|
155 |
+
sample_rate: [8000, 24000, 48000]
|
156 |
+
aidar_v2:
|
157 |
+
latest:
|
158 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
159 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
|
160 |
+
sample_rate: [8000, 16000]
|
161 |
+
aidar_8khz:
|
162 |
+
latest:
|
163 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
164 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
165 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
|
166 |
+
sample_rate: 8000
|
167 |
+
v1:
|
168 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
169 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
170 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
|
171 |
+
sample_rate: 8000
|
172 |
+
aidar_16khz:
|
173 |
+
latest:
|
174 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
175 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
176 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
|
177 |
+
sample_rate: 16000
|
178 |
+
v1:
|
179 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
180 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
181 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
|
182 |
+
sample_rate: 16000
|
183 |
+
baya_v2:
|
184 |
+
latest:
|
185 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
186 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
|
187 |
+
sample_rate: [8000, 16000]
|
188 |
+
baya_8khz:
|
189 |
+
latest:
|
190 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
191 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
192 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
|
193 |
+
sample_rate: 8000
|
194 |
+
v1:
|
195 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
196 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
197 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
|
198 |
+
sample_rate: 8000
|
199 |
+
baya_16khz:
|
200 |
+
latest:
|
201 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
202 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
203 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
|
204 |
+
sample_rate: 16000
|
205 |
+
v1:
|
206 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
207 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
208 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
|
209 |
+
sample_rate: 16000
|
210 |
+
irina_v2:
|
211 |
+
latest:
|
212 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
213 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
|
214 |
+
sample_rate: [8000, 16000]
|
215 |
+
irina_8khz:
|
216 |
+
latest:
|
217 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
218 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
219 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
|
220 |
+
sample_rate: 8000
|
221 |
+
v1:
|
222 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
223 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
224 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
|
225 |
+
sample_rate: 8000
|
226 |
+
irina_16khz:
|
227 |
+
latest:
|
228 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
229 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
230 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
|
231 |
+
sample_rate: 16000
|
232 |
+
v1:
|
233 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
234 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
235 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
|
236 |
+
sample_rate: 16000
|
237 |
+
kseniya_v2:
|
238 |
+
latest:
|
239 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
240 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
|
241 |
+
sample_rate: [8000, 16000]
|
242 |
+
kseniya_8khz:
|
243 |
+
latest:
|
244 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
245 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
246 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
|
247 |
+
sample_rate: 8000
|
248 |
+
v1:
|
249 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
250 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
251 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
|
252 |
+
sample_rate: 8000
|
253 |
+
kseniya_16khz:
|
254 |
+
latest:
|
255 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
256 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
257 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
|
258 |
+
sample_rate: 16000
|
259 |
+
v1:
|
260 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
261 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
262 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
|
263 |
+
sample_rate: 16000
|
264 |
+
natasha_v2:
|
265 |
+
latest:
|
266 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
267 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
|
268 |
+
sample_rate: [8000, 16000]
|
269 |
+
natasha_8khz:
|
270 |
+
latest:
|
271 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
272 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
273 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
|
274 |
+
sample_rate: 8000
|
275 |
+
v1:
|
276 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
277 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
278 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
|
279 |
+
sample_rate: 8000
|
280 |
+
natasha_16khz:
|
281 |
+
latest:
|
282 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
283 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
284 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
|
285 |
+
sample_rate: 16000
|
286 |
+
v1:
|
287 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
288 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
289 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
|
290 |
+
sample_rate: 16000
|
291 |
+
ruslan_v2:
|
292 |
+
latest:
|
293 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
294 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
|
295 |
+
sample_rate: [8000, 16000]
|
296 |
+
ruslan_8khz:
|
297 |
+
latest:
|
298 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
299 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
300 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
|
301 |
+
sample_rate: 8000
|
302 |
+
v1:
|
303 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
304 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
305 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
|
306 |
+
sample_rate: 8000
|
307 |
+
ruslan_16khz:
|
308 |
+
latest:
|
309 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
310 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
311 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
|
312 |
+
sample_rate: 16000
|
313 |
+
v1:
|
314 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
315 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
316 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
|
317 |
+
sample_rate: 16000
|
318 |
+
en:
|
319 |
+
v3_en:
|
320 |
+
latest:
|
321 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
322 |
+
package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
|
323 |
+
sample_rate: [8000, 24000, 48000]
|
324 |
+
v3_en_indic:
|
325 |
+
latest:
|
326 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
327 |
+
package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
|
328 |
+
sample_rate: [8000, 24000, 48000]
|
329 |
+
lj_v2:
|
330 |
+
latest:
|
331 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
332 |
+
package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
|
333 |
+
sample_rate: [8000, 16000]
|
334 |
+
lj_8khz:
|
335 |
+
latest:
|
336 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
337 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
338 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
|
339 |
+
sample_rate: 8000
|
340 |
+
v1:
|
341 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
342 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
343 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
|
344 |
+
sample_rate: 8000
|
345 |
+
lj_16khz:
|
346 |
+
latest:
|
347 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
348 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
349 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
|
350 |
+
sample_rate: 16000
|
351 |
+
v1:
|
352 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
353 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
354 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
|
355 |
+
sample_rate: 16000
|
356 |
+
de:
|
357 |
+
v3_de:
|
358 |
+
latest:
|
359 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
360 |
+
package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
|
361 |
+
sample_rate: [8000, 24000, 48000]
|
362 |
+
thorsten_v2:
|
363 |
+
latest:
|
364 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
365 |
+
package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
|
366 |
+
sample_rate: [8000, 16000]
|
367 |
+
thorsten_8khz:
|
368 |
+
latest:
|
369 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
370 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
371 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
|
372 |
+
sample_rate: 8000
|
373 |
+
v1:
|
374 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
375 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
376 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
|
377 |
+
sample_rate: 8000
|
378 |
+
thorsten_16khz:
|
379 |
+
latest:
|
380 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
381 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
382 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
|
383 |
+
sample_rate: 16000
|
384 |
+
v1:
|
385 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
386 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
387 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
|
388 |
+
sample_rate: 16000
|
389 |
+
es:
|
390 |
+
v3_es:
|
391 |
+
latest:
|
392 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
393 |
+
package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
|
394 |
+
sample_rate: [8000, 24000, 48000]
|
395 |
+
tux_v2:
|
396 |
+
latest:
|
397 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
398 |
+
package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
|
399 |
+
sample_rate: [8000, 16000]
|
400 |
+
tux_8khz:
|
401 |
+
latest:
|
402 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
403 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
404 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
|
405 |
+
sample_rate: 8000
|
406 |
+
v1:
|
407 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
408 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
409 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
|
410 |
+
sample_rate: 8000
|
411 |
+
tux_16khz:
|
412 |
+
latest:
|
413 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
414 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
415 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
|
416 |
+
sample_rate: 16000
|
417 |
+
v1:
|
418 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
419 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
420 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
|
421 |
+
sample_rate: 16000
|
422 |
+
fr:
|
423 |
+
v3_fr:
|
424 |
+
latest:
|
425 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
426 |
+
package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
|
427 |
+
sample_rate: [8000, 24000, 48000]
|
428 |
+
gilles_v2:
|
429 |
+
latest:
|
430 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
431 |
+
package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
|
432 |
+
sample_rate: [8000, 16000]
|
433 |
+
gilles_8khz:
|
434 |
+
latest:
|
435 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
436 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
437 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
|
438 |
+
sample_rate: 8000
|
439 |
+
v1:
|
440 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
441 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
442 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
|
443 |
+
sample_rate: 8000
|
444 |
+
gilles_16khz:
|
445 |
+
latest:
|
446 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
447 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
448 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
|
449 |
+
sample_rate: 16000
|
450 |
+
v1:
|
451 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
452 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
453 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
|
454 |
+
sample_rate: 16000
|
455 |
+
ba:
|
456 |
+
aigul_v2:
|
457 |
+
latest:
|
458 |
+
example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
|
459 |
+
package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
|
460 |
+
sample_rate: [8000, 16000]
|
461 |
+
language_name: 'bashkir'
|
462 |
+
xal:
|
463 |
+
v3_xal:
|
464 |
+
latest:
|
465 |
+
example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
|
466 |
+
package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
|
467 |
+
sample_rate: [8000, 24000, 48000]
|
468 |
+
erdni_v2:
|
469 |
+
latest:
|
470 |
+
example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
|
471 |
+
package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
|
472 |
+
sample_rate: [8000, 16000]
|
473 |
+
language_name: 'kalmyk'
|
474 |
+
tt:
|
475 |
+
v3_tt:
|
476 |
+
latest:
|
477 |
+
example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
|
478 |
+
package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
|
479 |
+
sample_rate: [8000, 24000, 48000]
|
480 |
+
dilyara_v2:
|
481 |
+
latest:
|
482 |
+
example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
|
483 |
+
package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
|
484 |
+
sample_rate: [8000, 16000]
|
485 |
+
language_name: 'tatar'
|
486 |
+
uz:
|
487 |
+
v3_uz:
|
488 |
+
latest:
|
489 |
+
example: 'Tanishganimdan xursandman.'
|
490 |
+
package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
|
491 |
+
sample_rate: [8000, 24000, 48000]
|
492 |
+
dilnavoz_v2:
|
493 |
+
latest:
|
494 |
+
example: 'Tanishganimdan xursandman.'
|
495 |
+
package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
|
496 |
+
sample_rate: [8000, 16000]
|
497 |
+
language_name: 'uzbek'
|
498 |
+
ua:
|
499 |
+
v3_ua:
|
500 |
+
latest:
|
501 |
+
example: 'К+отики - пухн+асті жив+отики.'
|
502 |
+
package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
|
503 |
+
sample_rate: [8000, 24000, 48000]
|
504 |
+
mykyta_v2:
|
505 |
+
latest:
|
506 |
+
example: 'К+отики - пухн+асті жив+отики.'
|
507 |
+
package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
|
508 |
+
sample_rate: [8000, 24000, 48000]
|
509 |
+
language_name: 'ukrainian'
|
510 |
+
indic:
|
511 |
+
v3_indic:
|
512 |
+
latest:
|
513 |
+
example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
|
514 |
+
package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
|
515 |
+
sample_rate: [8000, 24000, 48000]
|
516 |
+
multi:
|
517 |
+
multi_v2:
|
518 |
+
latest:
|
519 |
+
package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
|
520 |
+
sample_rate: [8000, 16000]
|
521 |
+
speakers:
|
522 |
+
aidar:
|
523 |
+
lang: 'ru'
|
524 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
525 |
+
baya:
|
526 |
+
lang: 'ru'
|
527 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
528 |
+
kseniya:
|
529 |
+
lang: 'ru'
|
530 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
531 |
+
irina:
|
532 |
+
lang: 'ru'
|
533 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
534 |
+
ruslan:
|
535 |
+
lang: 'ru'
|
536 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
537 |
+
natasha:
|
538 |
+
lang: 'ru'
|
539 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
540 |
+
thorsten:
|
541 |
+
lang: 'de'
|
542 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
543 |
+
tux:
|
544 |
+
lang: 'es'
|
545 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
546 |
+
gilles:
|
547 |
+
lang: 'fr'
|
548 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
549 |
+
lj:
|
550 |
+
lang: 'en'
|
551 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
552 |
+
dilyara:
|
553 |
+
lang: 'tt'
|
554 |
+
example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
|
555 |
+
te_models:
|
556 |
+
latest:
|
557 |
+
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
|
558 |
+
languages: ['en', 'de', 'ru', 'es']
|
559 |
+
punct: '.,-!?—'
|
560 |
+
v2:
|
561 |
+
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
|
562 |
+
languages: ['en', 'de', 'ru', 'es']
|
563 |
+
punct: '.,-!?—'
|
requirements.txt
CHANGED
@@ -6,6 +6,8 @@ scikit-learn
|
|
6 |
tensorboardX
|
7 |
sentencepiece # summaruization
|
8 |
autocorrect # spelling
|
|
|
|
|
9 |
# pip install git+https://github.com/RussianNLP/russian_paraphrasers@master
|
10 |
#pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
|
11 |
#pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
|
|
|
6 |
tensorboardX
|
7 |
sentencepiece # summaruization
|
8 |
autocorrect # spelling
|
9 |
+
normalizer
|
10 |
+
textwrap3 #??
|
11 |
# pip install git+https://github.com/RussianNLP/russian_paraphrasers@master
|
12 |
#pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
|
13 |
#pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
|