a Σc3:@sddlZddlZddlZddlZddlmZddlmZddl m Z ddl m Z ddl m Z GdddejZGd d d ejZGd d d ejZGd ddejZdS)N)nn) functional)commons)modules) LayerNormcs&eZdZdfdd ZddZZS) Encoder c st||_||_||_||_||_||_||_t ||_ t |_ t |_t |_t |_t|jD]X} |j t|||||d|jt||jt|||||d|jt|qrdS)N) p_dropout window_sizer )super__init__hidden_channelsfilter_channelsn_headsn_layers kernel_sizer r rDropoutdrop ModuleList attn_layers norm_layers_1 ffn_layers norm_layers_2rangeappendMultiHeadAttentionrFFN) selfrrrrrr r kwargsi __class__LE:\codes\py39\py39vits_vc\runtime\lib\site-packages\infer_pack\attentions.pyrsD        zEncoder.__init__cCs|d|d}||}t|jD]^}|j||||}||}|j|||}|j|||}||}|j|||}q&||}|S)N) unsqueezerrrrrrr)r xx_mask attn_maskr"yr%r%r&forward=s  zEncoder.forward)rr r __name__ __module__ __qualname__rr. __classcell__r%r%r#r&r s /rcs&eZdZd fdd ZddZZS) Decoderrr FTc st||_||_||_||_||_||_||_||_ t ||_ t |_t |_t |_t |_t |_t |_t|jD]} |jt||||||d|jt||jt||||d|jt||jt|||||dd|jt|qdS)N)r proximal_bias proximal_initr T)r causal)rrrrrrrr r5r6rrrrself_attn_layers norm_layers_0encdec_attn_layersrrrrrrrr) r rrrrrr r5r6r!r"r#r%r&rMsZ          zDecoder.__init__c Cst|dj|j|jd}|d|d}||}t|jD]}|j ||||}| |}|j |||}|j ||||}| |}|j |||}|j|||}| |}|j|||}qD||}|S)z< x: decoder input h: encoder output r'devicedtyper()rsubsequent_masksizetor<r=r)rrr8rr9r:rrr) r r*r+hZh_maskZself_attn_maskZencdec_attn_maskr"r-r%r%r&r.s"   zDecoder.forward)rr FTr/r%r%r#r&r4Ls ;r4csbeZdZdfdd ZdddZdd d Zd d Zd dZddZddZ ddZ ddZ Z S)rr NTFc st||dksJ||_||_||_||_||_||_||_||_ | |_ d|_ |||_ t ||d|_t ||d|_t ||d|_t ||d|_t ||_|dur|rdn|} |j d} t t| |dd|j | |_t t| |dd|j | |_t j|jjt j|jjt j|jj| rt4|jj|jj|jj|jjWdn1s0YdS)Nrrgr') rrchannels out_channelsrr r heads_share block_lengthr5r6attn k_channelsrConv1dconv_qconv_kconv_vconv_orr Parametertorchrandn emb_rel_k emb_rel_vinitxavier_uniform_weightno_gradcopy_bias) r rBrCrr r rDrEr5r6Z n_heads_relZ rel_stddevr#r%r&rsL       zMultiHeadAttention.__init__cCsD||}||}||}|j||||d\}|_||}|S)N)mask)rIrJrK attentionrFrL)r r*cr,qkvr%r%r&r.s     zMultiHeadAttention.forwardcCsg||dR\}}}}|||j|j|dd}|||j|j|dd}|||j|j|dd}t|t|j|dd} |j dur||ksJd| |j |} | |t|j| } | | } | | } |jr||ksJd| ||j| j| jd} |dur~| |dkd } |jdur~||ksRJd t| |j |j} | | dkd } tj| dd }||}t||}|j dur||}| |j|}||||}|dd|||}||fS) Nr'r(z8Relative attention is only available for self-attention.z3Proximal bias is only available for self-attention.r;rgz5Local attention is only available for self-attention.)dim)r?viewrrG transposerNmatmulmathsqrtr _get_relative_embeddingsrP_matmul_with_relative_keys'_relative_position_to_absolute_positionr5_attention_bias_proximalr@r<r= masked_fillrE ones_liketriutrilFsoftmaxr'_absolute_position_to_relative_positionrQ_matmul_with_relative_values contiguous)r querykeyvaluerXbdZt_sZt_tscoresZkey_relative_embeddingsZ rel_logitsZ scores_local block_maskZp_attnoutputZrelative_weightsZvalue_relative_embeddingsr%r%r&rYsb            zMultiHeadAttention.attentioncCst||d}|S)zU x: [b, h, l, m] y: [h or 1, m, d] ret: [b, h, l, d] r)rNrcr)r r*r-retr%r%r&rqsz/MultiHeadAttention._matmul_with_relative_valuescCst||ddd}|S)zU x: [b, h, l, d] y: [h or 1, m, d] ret: [b, h, l, m] rr_r()rNrcr)rbr{r%r%r&rgsz-MultiHeadAttention._matmul_with_relative_keysc Csd|jd}t||jdd}t|jd|d}|d|d}|dkrtt|tddg||gddgg}n|}|dd||f}|S)Nr'rr)r maxrnpadrconvert_pad_shape) r Zrelative_embeddingslengthZmax_relative_positionZ pad_lengthZslice_start_positionZslice_end_positionZpadded_relative_embeddingsZused_relative_embeddingsr%r%r&rf'sz+MultiHeadAttention._get_relative_embeddingsc Cs|\}}}}t|tddgddgddgddgg}||||d|g}t|tddgddgd|dgg}||||dd|dgddddd||ddf}|S)z? x: [b, h, l, 2*l-1] ret: [b, h, l, l] rrr'Nr?rnr~rrrar r*batchZheadsr_Zx_flatZx_finalr%r%r&rh9s* z:MultiHeadAttention._relative_position_to_absolute_positionc Cs|\}}}}t|tddgddgddgd|dgg}||||d||dg}t|tddgddg|dgg}||||d|gddddddddf}|S)z? x: [b, h, l, l] ret: [b, h, l, 2*l-1] rrr'Nrrr%r%r&rpNs& $2z:MultiHeadAttention._absolute_position_to_relative_positionc CsJtj|tjd}t|dt|d}tttt| ddS)zBias for self-attention to encourage attention to close positions. Args: length: an integer scalar. Returns: a Tensor with shape [1, 1, length, length] )r=rr)rNarangefloat32r)log1pabs)r rrdiffr%r%r&ri^sz+MultiHeadAttention._attention_bias_proximal)r NTNFF)N)N) r0r1r2rr.rYrqrgrfrhrprir3r%r%r#r&rs5 3  rcs6eZdZd fdd ZddZdd Zd d ZZS) rr NFcszt||_||_||_||_||_||_||_|rB|j |_ n|j |_ t ||||_t ||||_t ||_dS)N)rr in_channelsrCrrr activationr7_causal_paddingpadding _same_paddingrrHconv_1conv_2rr)r rrCrrr rr7r#r%r&rks  z FFN.__init__cCsb||||}|jdkr2|td|}n t|}||}||||}||S)NgelugZd;?)rrrrNsigmoidrelurr)r r*r+r%r%r&r.s   z FFN.forwardcCsH|jdkr|S|jd}d}ddgddg||gg}t|t|}|S)Nrrrrnr~rrr r*pad_lpad_rrr%r%r&rs  zFFN._causal_paddingcCsR|jdkr|S|jdd}|jd}ddgddg||gg}t|t|}|S)Nrr'rrrr%r%r&rs  zFFN._same_padding)r NF)r0r1r2rr.rrr3r%r%r#r&rjs  r)copyrdnumpynprNrtorch.nnrrn infer_packrrinfer_pack.modulesrModulerr4rrr%r%r%r&s     ?VI