Shangming Cai commited on
Commit
08c8530
·
1 Parent(s): 4690395

Pull updates from branch 'main' of https://huggingface.co/Qwen/Qwen-7B-Chat-Int4.

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. assets/wechat.png +0 -0
  3. modeling_qwen.py +2 -4
README.md CHANGED
@@ -18,7 +18,7 @@ inference: false
18
  <p align="center">
19
  🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp 📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a> &nbsp&nbsp | &nbsp&nbsp🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
20
  <br>
21
- <a href="assets/wechat.png">WeChat (微信)</a>&nbsp&nbsp | &nbsp&nbsp<a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>&nbsp&nbsp | &nbsp&nbsp<a href="https://dashscope.aliyun.com">API</a>
22
  </p>
23
  <br>
24
 
 
18
  <p align="center">
19
  🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp 📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a> &nbsp&nbsp | &nbsp&nbsp🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
20
  <br>
21
+ <a href="https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png">WeChat (微信)</a>&nbsp&nbsp | &nbsp&nbsp<a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>&nbsp&nbsp | &nbsp&nbsp<a href="https://dashscope.aliyun.com">API</a>
22
  </p>
23
  <br>
24
 
assets/wechat.png CHANGED
modeling_qwen.py CHANGED
@@ -540,9 +540,7 @@ class QWenAttention(nn.Module):
540
 
541
  if not self.use_cache_quantization and SUPPORT_TORCH2:
542
  if attention_mask is not None:
543
- attention_mask = attention_mask.expand(
544
- -1, -1, causal_mask.size(2), -1
545
- )
546
  if causal_mask is not None:
547
  attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
548
  else:
@@ -1356,7 +1354,7 @@ def apply_rotary_pos_emb(t, freqs):
1356
  t (tensor(batch_size, seq_len, n_head, head_dim)):
1357
  the input embedding/hidden states
1358
  freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
1359
- the cached cos/sin position embeddings
1360
  """
1361
  rot_dim = freqs[0].shape[-1]
1362
  cos, sin = freqs
 
540
 
541
  if not self.use_cache_quantization and SUPPORT_TORCH2:
542
  if attention_mask is not None:
543
+ attention_mask = attention_mask.expand(-1, -1, query.size(2), -1)
 
 
544
  if causal_mask is not None:
545
  attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
546
  else:
 
1354
  t (tensor(batch_size, seq_len, n_head, head_dim)):
1355
  the input embedding/hidden states
1356
  freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
1357
+ the cached cos/sin position embeddings
1358
  """
1359
  rot_dim = freqs[0].shape[-1]
1360
  cos, sin = freqs