import pdb | |
import subprocess | |
import re | |
# Output file | |
output_file = "pass_rate_output.txt" | |
# Clearing the output file before appending new content | |
with open(output_file, "w") as file: | |
file.write("") | |
# List of input paths | |
input_path_lists = [ | |
"test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/", | |
] | |
def get_output(input_string, k): | |
pattern = r"zero_shot/(\w+)/(.+?)/(\w+)" | |
match = re.search(pattern, input_string) | |
if match: | |
part1 = match.group(1) | |
part2 = match.group(3) + f"pass{k}.jsonl" | |
result = "/".join([part1, part2]) | |
print(result) | |
else: | |
print("No match found.") | |
assert True | |
return result | |
# List of input paths | |
input_path_lists = [ | |
# "../auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/10/", | |
# "../auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all/2/10/", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1", | |
# "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all/2/20/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/5/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/5/", | |
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/", | |
# "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/", | |
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/", | |
# "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/", | |
# "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/", | |
# "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/1/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/3/1/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all_mathrft/2/10/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/", | |
# "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/", | |
# Add more input paths as needed | |
] | |
# Iterate through the input paths and run the command | |
for input_path in input_path_lists: | |
k = 5 | |
if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path: | |
print(f"wild") | |
print(f"Running for input path: {input_path}", file=open(output_file, "a")) | |
command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)} --k {k}" | |
subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT) | |
print("\n\n",file=open(output_file, "a")) | |
else: | |
print(f"lean") | |
print(f"Running for input path: {input_path}", file=open(output_file, "a")) | |
command = f"python3 pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}" | |
subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT) | |
print("\n\n",file=open(output_file, "a")) | |