sft_mjschock-chat_threads / tokenizer_config.json
mjschock's picture
Training in progress, step 3
f20d0a7 verified
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<s>",
"chat_template": "{% for message in messages %}<|im_start|>{{ message.role }}\n{% if message.role == 'system' %}{{ message.content }}{% if tools and tools | length > 0 %}\n\nYou have access to the following functions:\n{% for tool in tools %}\nfunctions.{{ tool.function.name }}:\n{{ tool.function.parameters | tojson }}\n{% endfor %}\n\nYou can respond to users messages with either a single message or one or more function calls.\n\nTo respond with a message begin the message with 'message:', use the following format:\n\nmessage:\n<message>\n\nTo respond with one or more function calls begin the message with 'functions.<function_name>:', use the following format:\n\nfunctions.<function_name>:\n{ \"arg1\": \"value1\", \"arg2\": \"value2\" }\nfunctions.<function_name>:\n{ \"arg1\": \"value1\", \"arg2\": \"value2\" }{% endif %}<|im_end|>\n{% endif %}{% if message.role == 'user' %}{{ message.content }}<|im_end|>\n{% endif %}{% if message.role == 'assistant' %}{% if message.content and message.content | length > 0 %}{% if tools %}message:\n{% endif %}{{ message.content }}<|im_end|>\n{% endif %}{% if message.tool_calls and message.tool_calls | length > 0 %}{% for tool_call in message.tool_calls %}functions.{{ tool_call.function.name }}:\n{{ tool_call.function.arguments }}{% endfor %}<|im_end|>\n{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 2048,
"pad_token": "</s>",
"padding_side": "right",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}