fix(cli): prepend queued notes safely to multimodal messages

Sending an image to a vision model turns the user message into a list of
OpenAI-style content parts. When a /model or /reload-skills note was queued
for the same turn, the CLI did `note + "\n\n" + agent_message`, crashing the
agent thread with:

    TypeError: can only concatenate str (not "list") to str

Repro: `/model gpt-5.5 --provider openai-codex`, then paste+send an image.

Add _prepend_note_to_message(), which folds the note into the first text
part of a content-parts list (or inserts a leading text part for image-only
messages) and keeps the plain-string path unchanged. Used for both the
model-switch and skills-reload notes.
This commit is contained in:
xxxigm
2026-06-02 07:02:51 +07:00
committed by Teknium
parent 21f55af769
commit 043350dfd3

43
cli.py
View File

@ -2116,6 +2116,39 @@ def _cprint(text: str):
pass
def _prepend_note_to_message(message, note: str):
"""Prepend a one-shot system-style note to a user message.
``message`` is normally a plain string, but when the user attaches an image
to a vision-capable model it becomes a list of OpenAI-style content parts
(text + ``image_url`` blocks). Naively doing ``note + "\\n\\n" + message``
then raises ``TypeError: can only concatenate str (not "list") to str`` —
e.g. running ``/model ...`` (which queues a model-switch note) and then
sending a pasted image in the same turn.
Returns the message with ``note`` prepended:
* ``str`` → ``f"{note}\\n\\n{message}"``
* ``list`` → note folded into the first text part, or inserted as a new
leading ``{"type": "text"}`` part when there is no text part.
Unknown shapes are returned unchanged (fail-open).
"""
if not note:
return message
if isinstance(message, str):
return f"{note}\n\n{message}"
if isinstance(message, list):
parts = list(message)
for i, part in enumerate(parts):
if isinstance(part, dict) and part.get("type") == "text":
merged = dict(part)
merged["text"] = f"{note}\n\n{part.get('text', '')}"
parts[i] = merged
return parts
# No text part (image-only) — insert the note as a leading text block.
return [{"type": "text", "text": note}, *parts]
return message
# ---------------------------------------------------------------------------
# File-drop / local attachment detection — extracted as pure helpers for tests.
# ---------------------------------------------------------------------------
@ -12135,17 +12168,21 @@ class HermesCLI:
reset_current_session_key = None # type: ignore[assignment]
_approval_session_token = None
agent_message = _voice_prefix + message if _voice_prefix else message
# Prepend pending model switch note so the model knows about the switch
# Prepend pending notes via _prepend_note_to_message, which
# handles both plain-string and multimodal content-parts list
# messages. Naive ``note + "\n\n" + agent_message`` crashed with
# TypeError when an image was attached (agent_message is a list)
# and a /model or /reload-skills note was queued for the turn.
_msn = getattr(self, '_pending_model_switch_note', None)
if _msn:
agent_message = _msn + "\n\n" + agent_message
agent_message = _prepend_note_to_message(agent_message, _msn)
self._pending_model_switch_note = None
# Prepend pending /reload-skills note so the model sees which
# skills were added/removed before handling this turn. Same
# one-shot queue pattern as the model-switch note above.
_srn = getattr(self, '_pending_skills_reload_note', None)
if _srn:
agent_message = _srn + "\n\n" + agent_message
agent_message = _prepend_note_to_message(agent_message, _srn)
self._pending_skills_reload_note = None
try:
result = self.agent.run_conversation(