fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157)
Kimi's gateway selects the correct temperature server-side based on the active mode (thinking -> 1.0, non-thinking -> 0.6). Sending any temperature value — even the previously "correct" one — conflicts with gateway-managed defaults. Replaces the old approach of forcing specific temperature values (0.6 for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel that tells all call sites to strip the temperature key from API kwargs entirely. Changes: - agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model() prefix check (covers all kimi-* models), _fixed_temperature_for_model() returns sentinel for kimi models. _build_call_kwargs() strips temp. - run_agent.py: _build_api_kwargs, flush_memories, and summary generation paths all handle the sentinel by popping/omitting temperature. - trajectory_compressor.py: _effective_temperature_for_model returns None for kimi (sentinel mapped), direct client calls use kwargs dict to conditionally include temperature. - mini_swe_runner.py: same sentinel handling via wrapper function. - 6 test files updated: all 'forces temperature X' assertions replaced with 'temperature not in kwargs' assertions. Net: -76 lines (171 added, 247 removed). Inspired by PR #13137 (@kshitijk4poor).
This commit is contained in:
@ -58,14 +58,20 @@ def _effective_temperature_for_model(
|
||||
model: str,
|
||||
requested_temperature: float,
|
||||
base_url: Optional[str] = None,
|
||||
) -> float:
|
||||
"""Apply fixed model temperature contracts to direct client calls."""
|
||||
) -> Optional[float]:
|
||||
"""Apply fixed model temperature contracts to direct client calls.
|
||||
|
||||
Returns ``None`` when the model manages temperature server-side (Kimi);
|
||||
callers must omit the ``temperature`` kwarg entirely in that case.
|
||||
"""
|
||||
try:
|
||||
from agent.auxiliary_client import _fixed_temperature_for_model
|
||||
from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
|
||||
except Exception:
|
||||
return requested_temperature
|
||||
|
||||
fixed_temperature = _fixed_temperature_for_model(model, base_url)
|
||||
if fixed_temperature is OMIT_TEMPERATURE:
|
||||
return None # caller must omit temperature
|
||||
if fixed_temperature is not None:
|
||||
return fixed_temperature
|
||||
return requested_temperature
|
||||
@ -600,12 +606,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
max_tokens=self.config.summary_target_tokens * 2,
|
||||
)
|
||||
else:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.config.summarization_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=summary_temperature,
|
||||
max_tokens=self.config.summary_target_tokens * 2,
|
||||
)
|
||||
_create_kwargs = {
|
||||
"model": self.config.summarization_model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": self.config.summary_target_tokens * 2,
|
||||
}
|
||||
if summary_temperature is not None:
|
||||
_create_kwargs["temperature"] = summary_temperature
|
||||
response = self.client.chat.completions.create(**_create_kwargs)
|
||||
|
||||
summary = self._coerce_summary_content(response.choices[0].message.content)
|
||||
return self._ensure_summary_prefix(summary)
|
||||
@ -667,12 +675,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
max_tokens=self.config.summary_target_tokens * 2,
|
||||
)
|
||||
else:
|
||||
response = await self._get_async_client().chat.completions.create(
|
||||
model=self.config.summarization_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=summary_temperature,
|
||||
max_tokens=self.config.summary_target_tokens * 2,
|
||||
)
|
||||
_create_kwargs = {
|
||||
"model": self.config.summarization_model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": self.config.summary_target_tokens * 2,
|
||||
}
|
||||
if summary_temperature is not None:
|
||||
_create_kwargs["temperature"] = summary_temperature
|
||||
response = await self._get_async_client().chat.completions.create(**_create_kwargs)
|
||||
|
||||
summary = self._coerce_summary_content(response.choices[0].message.content)
|
||||
return self._ensure_summary_prefix(summary)
|
||||
|
||||
Reference in New Issue
Block a user