a Df`/@s(ddlZddlZddlmZddlZddlmZddlmZddlZddl Z ddl Z ddZ ddZ d d Z ddlZd d Zd dZddZedkr$eZejdeddejdeddejdeddejdeddejdeddejdegddejd egd!deZeedS)"N)ArgumentParser)ThreadPoolExecutor)tqdmc Cs.g}d}d}t}tj|d}t|d}||dWdn1sP0Yd|}d|}zdtj|ddtj tj d} | j d } | j d } t | } d | vr|d 7}|| | d d Wn<tjy } z |t| ddWYd} ~ n d} ~ 00|d 7}|||d}||dS)Nrz test.leanwcmd"{"path": "%s", "allTactics": true}echo '%s' | lake exe replTshellcheckstdoutstderrutf-8messagespassr r statusnopasserrorrdresults pass_rate)tempfile gettempdirospathjoinopenwrite subprocessrunPIPEr decoder jsonloadskeysappendCalledProcessErrorstr)itemrpassedtotaltemp_dir temp_filefdatacommandresultr r json_stdouterr7-/opt/tiger/repl/openllm_pass_rate_new_test.pywrapped_function s, ,    *r9c Csfg}d}d}t|D]}tdddd}||dWdn1sJ0Yd}d|}zdtj|d d tjtjd } | jd} t | } d | vr|d 7}| j d} | | | d dWn:tj y} z | t| ddWYd} ~ n d} ~ 00|d 7}q||d}t|tdd(}tj||d|dddWdn1sX0YdS)Nrztest/test.leanrrencodingrz.{"path": "test/test.lean", "allTactics": true}rTr rrrrrrrz results.jsonrFindent ensure_ascii)rr r!r"r#r$r r%r&r'r(r r)r*r+printdump) command_listargsrr-r.r,r1r2r3r4r r5r r6rr7r7r8single7s< ,        rDcs:g}d}d}ddt|}tddNfddt|D}t||dd D]}|}||qNWdn1sz0Yd d } | ||\} } td t|td | td|d| d|} tjtj | ddt | d4} t j d|d| d|| i| dddWdn1s,0YdS)Nrc SsVd}dd}||}g|d<t|dD]&\}}tj||d|d}t|d}||Wdn1sv0Yd |} d | d } z}zt|dd}WYd}~n d}~00d| vrbt| sb| | dd}nt| s&d| vr&d}| dD]}|ddkrd}|ddd}|ddd}t|dddD]0}t|d|}||d7}|sqq| | d|d }qq|sB| | dd}nt| s4J| | ddd }|d|q(|S)!Nz /opt/jianqiaocSs*i}|D]}|dvr ||||<q |S)N)questionanswer total outputr)r() json_data filtered_datakeyr7r7r8 filter_jsonis  z3multi..execute_command..filter_jsonrr_test_z.leanrrzecho 'z' | lake exe replTiX)r r timeoutr r r nopass_limitr nopass_errorrrrrseverityrrposlinecolumn r)r r r string_pos) enumeraterrrr r!r"r#r$r&r'r r%r TimeoutExpiredr+r*lenrangesplitr))r,indexr/rK result_dictirr0r1r2r3r4r r r6 result_itemflagme start_linecurrent_columnline_nline_lenr7r7r8execute_commandgsP ( $&    zmulti..execute_command) max_workerscsg|]\}}||qSr7)submit).0r^rrfexecutorr7r8 zmulti..zProcessing Commands)r.descc Ssd}d}|D]}|dg}|r ttdt|D]"}||ddkr2|d7}qVq2tt|t|D]"}||ddkrh|d7}q qhq |r|t|nd}|r|t|nd}||fS)Nrrrrr)getrZminrY) result_listk pass_1_count pass_k_countr4rjpass_1pass_kr7r7r8calculate_passs  zmulti..calculate_passz total len:zPass@1:zPass@:zpass_rate_results/T)exist_okrrrwpass_r<Fr=)rYrrWrr4r)r@rmakedirsrdirnamer r&rA)rB output_pathrsrr-r.futuresfuturer4ryrwrx output_filer1r7rkr8multics&9 *  rcCsd}t|d|S)Nz@\[simp\s*.*?\]$)resub)spatternr7r7r8remove_simp_pattern_from_endsrcs`tfdddDrjz"d}t||tj}d|}Wn.td|tj}t|rbd|}Yn0ntfdddDrz"d }t||tj}d|}Wn.td|tj}t|rd|}Yn0nvtfd dd DrBz"d }t||tj}d|}Wn0td|tj}t|r:d|}Yn0ntd t|s\d }|S)Nc3s|]}|vVqdSNr7rjx input_pathr7r8 rnzget_lean..)zdeepseek-math-7b-instructzdeepseek-math-7b-baseZ llemma_34bZ llemma_7bz```lean4\s*\n(.*?)\n``` z ```(.*?)```c3s|]}|vVqdSrr7rrr7r8rrn)zinternlm2-mathz```lean\s*\n(.*?)\n```c3s|]}|vVqdSrr7rrr7r8rrn)zMistral-7B-Instruct-v0.2znot implmementedz&theorem h : f + g = 39 := by exact rfl)anyrfindallDOTALLrrYNotImplementedErrorstrip)textrZcode_block_patternZ code_blockscontentmatchesr7rr8get_leans> rc Cs8g}tj|jd}t|D]}t|ddd}|D]}zt|}|dd}g|d<|ddt |j t |dD]D}d |jvr| d d }t ||j} |dd || gq|dd |d<Wnd dl} | Yn0||qs<   ,,m.