KoichiYasuoka commited on
Commit
6c30dfb
·
1 Parent(s): da14a06

model improved

Browse files
Files changed (2) hide show
  1. maker.py +8 -2
  2. pytorch_model.bin +1 -1
maker.py CHANGED
@@ -16,7 +16,7 @@ class UDEmbedsDataset(object):
16
  self.tokenizer=tokenizer
17
  self.embeddings=embeddings
18
  self.seeks=[0]
19
- label=set(["SYM","SYM."])
20
  dep=set()
21
  s=self.conllu.readline()
22
  while s!="":
@@ -66,8 +66,9 @@ class UDEmbedsDataset(object):
66
  emb=self.embeddings
67
  else:
68
  import torch
69
- if len(x)<128:
70
  x=[True]*len(x)
 
71
  else:
72
  w=sum([len(x)-i+1 if b else 0 for i,b in enumerate(x)])+1
73
  for i in range(len(x)):
@@ -84,6 +85,11 @@ class UDEmbedsDataset(object):
84
  for j in range(i+1,len(x)):
85
  ids.append(j)
86
  upos.append(p[j]+"|"+d[j] if int(c[j][6])==i+1 else p[i]+"|"+d[i] if int(c[i][6])==j+1 else p[j]+"|_")
 
 
 
 
 
87
  ids.append(-1)
88
  upos.append("SYM|_")
89
  with torch.no_grad():
 
16
  self.tokenizer=tokenizer
17
  self.embeddings=embeddings
18
  self.seeks=[0]
19
+ label=set(["SYM","SYM.","SYM|_"])
20
  dep=set()
21
  s=self.conllu.readline()
22
  while s!="":
 
66
  emb=self.embeddings
67
  else:
68
  import torch
69
+ if len(x)<127:
70
  x=[True]*len(x)
71
+ w=(len(x)+2)*(len(x)+1)/2
72
  else:
73
  w=sum([len(x)-i+1 if b else 0 for i,b in enumerate(x)])+1
74
  for i in range(len(x)):
 
85
  for j in range(i+1,len(x)):
86
  ids.append(j)
87
  upos.append(p[j]+"|"+d[j] if int(c[j][6])==i+1 else p[i]+"|"+d[i] if int(c[i][6])==j+1 else p[j]+"|_")
88
+ if w>8192 and i>0:
89
+ while w>8192 and upos[-1].endswith("|_"):
90
+ upos.pop(-1)
91
+ ids.pop(-1)
92
+ w-=1
93
  ids.append(-1)
94
  upos.append("SYM|_")
95
  with torch.no_grad():
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:844fe48fcb5c513dd8c954d8d4d546d4fa8f532babfa99dc4e2ec6feabce90b1
3
  size 1392836610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c73d3e7a178593b540a253d77587a8c8aaa459c1a4e621d5f20a40835a25785
3
  size 1392836610