Policy Training Settings
Training Style:
Supervised Fine-tuning (SFT)
GRPO (Reinforcement Learning)
Select the training style to use
{% macro render_fields(fields, prefix="", parent_key="") %} {% for key, value in fields.items() %} {% set full_key = prefix ~ ('.' if prefix else '') ~ key %} {% if value.value is defined %}
{{ key.replace('_', ' ').capitalize() }}:
{% if value.metadata.choices is defined %}
{% for choice in value.metadata.choices %}
{{ choice }}
{% endfor %}
{% elif value.input_type == "checkbox" %}
{% else %}
{% endif %} {% if value.metadata.help is defined %}
{{ value.metadata.help }}
{% endif %}
{% else %}
{{ key.replace('_', ' ').capitalize() }}
{% if key == 'train_policy' %}
Current training style: {{ training_style.upper() }}
{% endif %} {{ render_fields(value, prefix=full_key, parent_key=key) }}
{% endif %} {% endfor %} {% endmacro %} {{ render_fields(fields) }}
Next