Commit
·
6c30dfb
1
Parent(s):
da14a06
model improved
Browse files- maker.py +8 -2
- pytorch_model.bin +1 -1
maker.py
CHANGED
@@ -16,7 +16,7 @@ class UDEmbedsDataset(object):
|
|
16 |
self.tokenizer=tokenizer
|
17 |
self.embeddings=embeddings
|
18 |
self.seeks=[0]
|
19 |
-
label=set(["SYM","SYM."])
|
20 |
dep=set()
|
21 |
s=self.conllu.readline()
|
22 |
while s!="":
|
@@ -66,8 +66,9 @@ class UDEmbedsDataset(object):
|
|
66 |
emb=self.embeddings
|
67 |
else:
|
68 |
import torch
|
69 |
-
if len(x)<
|
70 |
x=[True]*len(x)
|
|
|
71 |
else:
|
72 |
w=sum([len(x)-i+1 if b else 0 for i,b in enumerate(x)])+1
|
73 |
for i in range(len(x)):
|
@@ -84,6 +85,11 @@ class UDEmbedsDataset(object):
|
|
84 |
for j in range(i+1,len(x)):
|
85 |
ids.append(j)
|
86 |
upos.append(p[j]+"|"+d[j] if int(c[j][6])==i+1 else p[i]+"|"+d[i] if int(c[i][6])==j+1 else p[j]+"|_")
|
|
|
|
|
|
|
|
|
|
|
87 |
ids.append(-1)
|
88 |
upos.append("SYM|_")
|
89 |
with torch.no_grad():
|
|
|
16 |
self.tokenizer=tokenizer
|
17 |
self.embeddings=embeddings
|
18 |
self.seeks=[0]
|
19 |
+
label=set(["SYM","SYM.","SYM|_"])
|
20 |
dep=set()
|
21 |
s=self.conllu.readline()
|
22 |
while s!="":
|
|
|
66 |
emb=self.embeddings
|
67 |
else:
|
68 |
import torch
|
69 |
+
if len(x)<127:
|
70 |
x=[True]*len(x)
|
71 |
+
w=(len(x)+2)*(len(x)+1)/2
|
72 |
else:
|
73 |
w=sum([len(x)-i+1 if b else 0 for i,b in enumerate(x)])+1
|
74 |
for i in range(len(x)):
|
|
|
85 |
for j in range(i+1,len(x)):
|
86 |
ids.append(j)
|
87 |
upos.append(p[j]+"|"+d[j] if int(c[j][6])==i+1 else p[i]+"|"+d[i] if int(c[i][6])==j+1 else p[j]+"|_")
|
88 |
+
if w>8192 and i>0:
|
89 |
+
while w>8192 and upos[-1].endswith("|_"):
|
90 |
+
upos.pop(-1)
|
91 |
+
ids.pop(-1)
|
92 |
+
w-=1
|
93 |
ids.append(-1)
|
94 |
upos.append("SYM|_")
|
95 |
with torch.no_grad():
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1392836610
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c73d3e7a178593b540a253d77587a8c8aaa459c1a4e621d5f20a40835a25785
|
3 |
size 1392836610
|