{%- set system_prompt = system_prompt | default(" Transform the text provided by various speakers into speech output, utilizing the distinct voice of each respective speaker. ") -%} {{ system_prompt -}} {%- set speech_start_token = speech_start_token | default("<|vision_start|>") %} {%- set speech_end_token = speech_end_token | default("<|vision_end|>") %} {%- set speech_diffusion_token = speech_diffusion_token | default("<|vision_pad|>") %} {%- set ns = namespace(speakers_with_audio="") %} {%- for message in messages %} {%- set role = message['role'] %} {%- set content = message['content'] %} {%- set has_audio = content | selectattr('type', 'equalto', 'audio') | list | length > 0 %} {%- if has_audio and role not in ns.speakers_with_audio %} {%- set ns.speakers_with_audio = ns.speakers_with_audio + role + "," %} {%- endif %} {%- endfor %} {%- if ns.speakers_with_audio %} {{ " Voice input: " }} {%- for speaker in ns.speakers_with_audio.rstrip(',').split(',') %} {%- if speaker %} Speaker {{ speaker }}:{{ speech_start_token }}{{ speech_diffusion_token }}{{ speech_end_token }}{{ " " }} {%- endif %} {%- endfor %} {%- endif %} Text input:{{ " " }} {%- for message in messages %} {%- set role = message['role'] %} {%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list %} {%- for item in text_items %} Speaker {{ role }}: {{ item['text'] }}{{ " " }} {%- endfor %} {%- endfor %} Speech output:{{ " " }}{{ speech_start_token }}