Skip to content

BrowserUseTool

autogen.tools.experimental.BrowserUseTool #

BrowserUseTool(*, llm_config, browser=None, agent_kwargs=None, browser_config=None)

Bases: Tool

BrowserUseTool is a tool that uses the browser to perform a task.

Use the browser to perform a task.

PARAMETER DESCRIPTION
llm_config

The LLM configuration.

TYPE: dict[str, Any]

browser

The browser to use. If defined, browser_config must be None

TYPE: Optional[Browser] DEFAULT: None

agent_kwargs

Additional keyword arguments to pass to the Agent

TYPE: Optional[dict[str, Any]] DEFAULT: None

browser_config

The browser configuration to use. If defined, browser must be None

TYPE: Optional[dict[str, Any]] DEFAULT: None

Source code in autogen/tools/experimental/browser_use/browser_use.py
def __init__(  # type: ignore[no-any-unimported]
    self,
    *,
    llm_config: dict[str, Any],
    browser: Optional["Browser"] = None,
    agent_kwargs: Optional[dict[str, Any]] = None,
    browser_config: Optional[dict[str, Any]] = None,
):
    """Use the browser to perform a task.

    Args:
        llm_config: The LLM configuration.
        browser: The browser to use. If defined, browser_config must be None
        agent_kwargs: Additional keyword arguments to pass to the Agent
        browser_config: The browser configuration to use. If defined, browser must be None
    """
    if agent_kwargs is None:
        agent_kwargs = {}

    if browser_config is None:
        browser_config = {}

    if browser is not None and browser_config:
        raise ValueError(
            f"Cannot provide both browser and additional keyword parameters: {browser=}, {browser_config=}"
        )

    if browser is None:
        # set default value for headless
        headless = browser_config.pop("headless", True)

        browser_config = BrowserConfig(headless=headless, **browser_config)
        browser = Browser(config=browser_config)

    # set default value for generate_gif
    if "generate_gif" not in agent_kwargs:
        agent_kwargs["generate_gif"] = False

    async def browser_use(  # type: ignore[no-any-unimported]
        task: Annotated[str, "The task to perform."],
        llm_config: Annotated[dict[str, Any], Depends(on(llm_config))],
        browser: Annotated[Browser, Depends(on(browser))],
        agent_kwargs: Annotated[dict[str, Any], Depends(on(agent_kwargs))],
    ) -> BrowserUseResult:
        llm = LangChainChatModelFactory.create_base_chat_model(llm_config)

        max_steps = agent_kwargs.pop("max_steps", 100)

        agent = Agent(
            task=task,
            llm=llm,
            browser=browser,
            controller=BrowserUseTool._get_controller(llm_config),
            **agent_kwargs,
        )

        result = await agent.run(max_steps=max_steps)

        return BrowserUseResult(
            extracted_content=result.extracted_content(),
            final_result=result.final_result(),
        )

    super().__init__(
        name="browser_use",
        description="Use the browser to perform a task.",
        func_or_tool=browser_use,
    )

name property #

name

description property #

description

func property #

func

tool_schema property #

tool_schema

Get the schema for the tool.

This is the preferred way of handling function calls with OpeaAI and compatible frameworks.

function_schema property #

function_schema

Get the schema for the function.

This is the old way of handling function calls with OpenAI and compatible frameworks. It is provided for backward compatibility.

realtime_tool_schema property #

realtime_tool_schema

Get the schema for the tool.

This is the preferred way of handling function calls with OpeaAI and compatible frameworks.

register_for_llm #

register_for_llm(agent)

Registers the tool for use with a ConversableAgent's language model (LLM).

This method registers the tool so that it can be invoked by the agent during interactions with the language model.

PARAMETER DESCRIPTION
agent

The agent to which the tool will be registered.

TYPE: ConversableAgent

Source code in autogen/tools/tool.py
def register_for_llm(self, agent: "ConversableAgent") -> None:
    """Registers the tool for use with a ConversableAgent's language model (LLM).

    This method registers the tool so that it can be invoked by the agent during
    interactions with the language model.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    agent.register_for_llm()(self)

register_for_execution #

register_for_execution(agent)

Registers the tool for direct execution by a ConversableAgent.

This method registers the tool so that it can be executed by the agent, typically outside of the context of an LLM interaction.

PARAMETER DESCRIPTION
agent

The agent to which the tool will be registered.

TYPE: ConversableAgent

Source code in autogen/tools/tool.py
def register_for_execution(self, agent: "ConversableAgent") -> None:
    """Registers the tool for direct execution by a ConversableAgent.

    This method registers the tool so that it can be executed by the agent,
    typically outside of the context of an LLM interaction.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    agent.register_for_execution()(self)

register_tool #

register_tool(agent)

Register a tool to be both proposed and executed by an agent.

Equivalent to calling both register_for_llm and register_for_execution with the same agent.

Note: This will not make the agent recommend and execute the call in the one step. If the agent recommends the tool, it will need to be the next agent to speak in order to execute the tool.

PARAMETER DESCRIPTION
agent

The agent to which the tool will be registered.

TYPE: ConversableAgent

Source code in autogen/tools/tool.py
def register_tool(self, agent: "ConversableAgent") -> None:
    """Register a tool to be both proposed and executed by an agent.

    Equivalent to calling both `register_for_llm` and `register_for_execution` with the same agent.

    Note: This will not make the agent recommend and execute the call in the one step. If the agent
    recommends the tool, it will need to be the next agent to speak in order to execute the tool.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    self.register_for_llm(agent)
    self.register_for_execution(agent)