Shangming Cai
commited on
Commit
·
08c8530
1
Parent(s):
4690395
Pull updates from branch 'main' of https://huggingface.co/Qwen/Qwen-7B-Chat-Int4.
Browse files- README.md +1 -1
- assets/wechat.png +0 -0
- modeling_qwen.py +2 -4
README.md
CHANGED
@@ -18,7 +18,7 @@ inference: false
|
|
18 |
<p align="center">
|
19 |
🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a>    |   🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
|
20 |
<br>
|
21 |
-
<a href="assets/wechat.png">WeChat (微信)</a>   |   <a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>   |   <a href="https://dashscope.aliyun.com">API</a>
|
22 |
</p>
|
23 |
<br>
|
24 |
|
|
|
18 |
<p align="center">
|
19 |
🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a>    |   🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
|
20 |
<br>
|
21 |
+
<a href="https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png">WeChat (微信)</a>   |   <a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>   |   <a href="https://dashscope.aliyun.com">API</a>
|
22 |
</p>
|
23 |
<br>
|
24 |
|
assets/wechat.png
CHANGED
modeling_qwen.py
CHANGED
@@ -540,9 +540,7 @@ class QWenAttention(nn.Module):
|
|
540 |
|
541 |
if not self.use_cache_quantization and SUPPORT_TORCH2:
|
542 |
if attention_mask is not None:
|
543 |
-
attention_mask = attention_mask.expand(
|
544 |
-
-1, -1, causal_mask.size(2), -1
|
545 |
-
)
|
546 |
if causal_mask is not None:
|
547 |
attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
|
548 |
else:
|
@@ -1356,7 +1354,7 @@ def apply_rotary_pos_emb(t, freqs):
|
|
1356 |
t (tensor(batch_size, seq_len, n_head, head_dim)):
|
1357 |
the input embedding/hidden states
|
1358 |
freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
|
1359 |
-
the cached cos/sin position embeddings
|
1360 |
"""
|
1361 |
rot_dim = freqs[0].shape[-1]
|
1362 |
cos, sin = freqs
|
|
|
540 |
|
541 |
if not self.use_cache_quantization and SUPPORT_TORCH2:
|
542 |
if attention_mask is not None:
|
543 |
+
attention_mask = attention_mask.expand(-1, -1, query.size(2), -1)
|
|
|
|
|
544 |
if causal_mask is not None:
|
545 |
attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
|
546 |
else:
|
|
|
1354 |
t (tensor(batch_size, seq_len, n_head, head_dim)):
|
1355 |
the input embedding/hidden states
|
1356 |
freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
|
1357 |
+
the cached cos/sin position embeddings
|
1358 |
"""
|
1359 |
rot_dim = freqs[0].shape[-1]
|
1360 |
cos, sin = freqs
|