VibeVoice-1.5B / chat_template.jinja
bezzam's picture
bezzam HF Staff
Upload processor
1467696 verified
{%- set system_prompt = system_prompt | default(" Transform the text provided by various speakers into speech output, utilizing the distinct voice of each respective speaker.
") -%}
{{ system_prompt -}}
{%- set speech_start_token = speech_start_token | default("<|vision_start|>") %}
{%- set speech_end_token = speech_end_token | default("<|vision_end|>") %}
{%- set speech_diffusion_token = speech_diffusion_token | default("<|vision_pad|>") %}
{%- set ns = namespace(speakers_with_audio="") %}
{%- for message in messages %}
{%- set role = message['role'] %}
{%- set content = message['content'] %}
{%- set has_audio = content | selectattr('type', 'equalto', 'audio') | list | length > 0 %}
{%- if has_audio and role not in ns.speakers_with_audio %}
{%- set ns.speakers_with_audio = ns.speakers_with_audio + role + "," %}
{%- endif %}
{%- endfor %}
{%- if ns.speakers_with_audio %}
{{ " Voice input:
" }}
{%- for speaker in ns.speakers_with_audio.rstrip(',').split(',') %}
{%- if speaker %}
Speaker {{ speaker }}:{{ speech_start_token }}{{ speech_diffusion_token }}{{ speech_end_token }}{{ "
" }}
{%- endif %}
{%- endfor %}
{%- endif %}
Text input:{{ "
" }}
{%- for message in messages %}
{%- set role = message['role'] %}
{%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list %}
{%- for item in text_items %}
Speaker {{ role }}: {{ item['text'] }}{{ "
" }}
{%- endfor %}
{%- endfor %}
Speech output:{{ "
" }}{{ speech_start_token }}