fix(cli): prepend queued notes safely to multimodal messages
Sending an image to a vision model turns the user message into a list of
OpenAI-style content parts. When a /model or /reload-skills note was queued
for the same turn, the CLI did `note + "\n\n" + agent_message`, crashing the
agent thread with:
TypeError: can only concatenate str (not "list") to str
Repro: `/model gpt-5.5 --provider openai-codex`, then paste+send an image.
Add _prepend_note_to_message(), which folds the note into the first text
part of a content-parts list (or inserts a leading text part for image-only
messages) and keeps the plain-string path unchanged. Used for both the
model-switch and skills-reload notes.
This commit is contained in:
43
cli.py
43
cli.py
@ -2116,6 +2116,39 @@ def _cprint(text: str):
|
||||
pass
|
||||
|
||||
|
||||
def _prepend_note_to_message(message, note: str):
|
||||
"""Prepend a one-shot system-style note to a user message.
|
||||
|
||||
``message`` is normally a plain string, but when the user attaches an image
|
||||
to a vision-capable model it becomes a list of OpenAI-style content parts
|
||||
(text + ``image_url`` blocks). Naively doing ``note + "\\n\\n" + message``
|
||||
then raises ``TypeError: can only concatenate str (not "list") to str`` —
|
||||
e.g. running ``/model ...`` (which queues a model-switch note) and then
|
||||
sending a pasted image in the same turn.
|
||||
|
||||
Returns the message with ``note`` prepended:
|
||||
* ``str`` → ``f"{note}\\n\\n{message}"``
|
||||
* ``list`` → note folded into the first text part, or inserted as a new
|
||||
leading ``{"type": "text"}`` part when there is no text part.
|
||||
Unknown shapes are returned unchanged (fail-open).
|
||||
"""
|
||||
if not note:
|
||||
return message
|
||||
if isinstance(message, str):
|
||||
return f"{note}\n\n{message}"
|
||||
if isinstance(message, list):
|
||||
parts = list(message)
|
||||
for i, part in enumerate(parts):
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
merged = dict(part)
|
||||
merged["text"] = f"{note}\n\n{part.get('text', '')}"
|
||||
parts[i] = merged
|
||||
return parts
|
||||
# No text part (image-only) — insert the note as a leading text block.
|
||||
return [{"type": "text", "text": note}, *parts]
|
||||
return message
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File-drop / local attachment detection — extracted as pure helpers for tests.
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -12135,17 +12168,21 @@ class HermesCLI:
|
||||
reset_current_session_key = None # type: ignore[assignment]
|
||||
_approval_session_token = None
|
||||
agent_message = _voice_prefix + message if _voice_prefix else message
|
||||
# Prepend pending model switch note so the model knows about the switch
|
||||
# Prepend pending notes via _prepend_note_to_message, which
|
||||
# handles both plain-string and multimodal content-parts list
|
||||
# messages. Naive ``note + "\n\n" + agent_message`` crashed with
|
||||
# TypeError when an image was attached (agent_message is a list)
|
||||
# and a /model or /reload-skills note was queued for the turn.
|
||||
_msn = getattr(self, '_pending_model_switch_note', None)
|
||||
if _msn:
|
||||
agent_message = _msn + "\n\n" + agent_message
|
||||
agent_message = _prepend_note_to_message(agent_message, _msn)
|
||||
self._pending_model_switch_note = None
|
||||
# Prepend pending /reload-skills note so the model sees which
|
||||
# skills were added/removed before handling this turn. Same
|
||||
# one-shot queue pattern as the model-switch note above.
|
||||
_srn = getattr(self, '_pending_skills_reload_note', None)
|
||||
if _srn:
|
||||
agent_message = _srn + "\n\n" + agent_message
|
||||
agent_message = _prepend_note_to_message(agent_message, _srn)
|
||||
self._pending_skills_reload_note = None
|
||||
try:
|
||||
result = self.agent.run_conversation(
|
||||
|
||||
Reference in New Issue
Block a user