Configure the model to use thinking mode.
with_thinking_mode(
self,
enabled: bool = True,
**kwargs: Any = {}
) -> Runnable[LanguageModelInput, BaseMessage]Example:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
model = ChatNVIDIA(model="nvidia/nvidia-nemotron-nano-9b-v2")
# Enable thinking mode
thinking_model = model.with_thinking_mode(enabled=True)
response = thinking_model.invoke("Hello")
# Disable thinking mode
no_thinking_model = model.with_thinking_mode(enabled=False)
response = no_thinking_model.invoke("Hello")