BrowserUseTool

autogen.tools.experimental.BrowserUseTool #

BrowserUseTool(*, llm_config, browser=None, agent_kwargs=None, browser_config=None)

Bases: Tool

BrowserUseTool is a tool that uses the browser to perform a task.

Use the browser to perform a task.

PARAMETER	DESCRIPTION
`llm_config`	The LLM configuration. TYPE: `dict[str, Any]`
`browser`	The browser to use. If defined, browser_config must be None TYPE: `Optional[Browser]` DEFAULT: `None`
`agent_kwargs`	Additional keyword arguments to pass to the Agent TYPE: `Optional[dict[str, Any]]` DEFAULT: `None`
`browser_config`	The browser configuration to use. If defined, browser must be None TYPE: `Optional[dict[str, Any]]` DEFAULT: `None`

Source code in autogen/tools/experimental/browser_use/browser_use.py

def __init__(  # type: ignore[no-any-unimported]
    self,
    *,
    llm_config: dict[str, Any],
    browser: Optional["Browser"] = None,
    agent_kwargs: Optional[dict[str, Any]] = None,
    browser_config: Optional[dict[str, Any]] = None,
):
    """Use the browser to perform a task.

    Args:
        llm_config: The LLM configuration.
        browser: The browser to use. If defined, browser_config must be None
        agent_kwargs: Additional keyword arguments to pass to the Agent
        browser_config: The browser configuration to use. If defined, browser must be None
    """
    if agent_kwargs is None:
        agent_kwargs = {}

    if browser_config is None:
        browser_config = {}

    if browser is not None and browser_config:
        raise ValueError(
            f"Cannot provide both browser and additional keyword parameters: {browser=}, {browser_config=}"
        )

    if browser is None:
        # set default value for headless
        headless = browser_config.pop("headless", True)

        browser_config = BrowserConfig(headless=headless, **browser_config)
        browser = Browser(config=browser_config)

    # set default value for generate_gif
    if "generate_gif" not in agent_kwargs:
        agent_kwargs["generate_gif"] = False

    async def browser_use(  # type: ignore[no-any-unimported]
        task: Annotated[str, "The task to perform."],
        llm_config: Annotated[dict[str, Any], Depends(on(llm_config))],
        browser: Annotated[Browser, Depends(on(browser))],
        agent_kwargs: Annotated[dict[str, Any], Depends(on(agent_kwargs))],
    ) -> BrowserUseResult:
        llm = LangChainChatModelFactory.create_base_chat_model(llm_config)

        max_steps = agent_kwargs.pop("max_steps", 100)

        agent = Agent(
            task=task,
            llm=llm,
            browser=browser,
            controller=BrowserUseTool._get_controller(llm_config),
            **agent_kwargs,
        )

        result = await agent.run(max_steps=max_steps)

        return BrowserUseResult(
            extracted_content=result.extracted_content(),
            final_result=result.final_result(),
        )

    super().__init__(
        name="browser_use",
        description="Use the browser to perform a task.",
        func_or_tool=browser_use,
    )

name `property` #

name

description `property` #

description

func `property` #

func

tool_schema `property` #

tool_schema

Get the schema for the tool.

This is the preferred way of handling function calls with OpeaAI and compatible frameworks.

function_schema `property` #

function_schema

Get the schema for the function.

This is the old way of handling function calls with OpenAI and compatible frameworks. It is provided for backward compatibility.

realtime_tool_schema `property` #

realtime_tool_schema

Get the schema for the tool.

This is the preferred way of handling function calls with OpeaAI and compatible frameworks.

register_for_llm #

register_for_llm(agent)

Registers the tool for use with a ConversableAgent's language model (LLM).

This method registers the tool so that it can be invoked by the agent during interactions with the language model.

PARAMETER	DESCRIPTION
`agent`	The agent to which the tool will be registered. TYPE: `ConversableAgent`

Source code in autogen/tools/tool.py

def register_for_llm(self, agent: "ConversableAgent") -> None:
    """Registers the tool for use with a ConversableAgent's language model (LLM).

    This method registers the tool so that it can be invoked by the agent during
    interactions with the language model.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    agent.register_for_llm()(self)

register_for_execution #

register_for_execution(agent)

Registers the tool for direct execution by a ConversableAgent.

This method registers the tool so that it can be executed by the agent, typically outside of the context of an LLM interaction.

PARAMETER	DESCRIPTION
`agent`	The agent to which the tool will be registered. TYPE: `ConversableAgent`

Source code in autogen/tools/tool.py

def register_for_execution(self, agent: "ConversableAgent") -> None:
    """Registers the tool for direct execution by a ConversableAgent.

    This method registers the tool so that it can be executed by the agent,
    typically outside of the context of an LLM interaction.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    agent.register_for_execution()(self)

register_tool #

register_tool(agent)

Register a tool to be both proposed and executed by an agent.

Equivalent to calling both register_for_llm and register_for_execution with the same agent.

Note: This will not make the agent recommend and execute the call in the one step. If the agent recommends the tool, it will need to be the next agent to speak in order to execute the tool.

PARAMETER	DESCRIPTION
`agent`	The agent to which the tool will be registered. TYPE: `ConversableAgent`

Source code in autogen/tools/tool.py

def register_tool(self, agent: "ConversableAgent") -> None:
    """Register a tool to be both proposed and executed by an agent.

    Equivalent to calling both `register_for_llm` and `register_for_execution` with the same agent.

    Note: This will not make the agent recommend and execute the call in the one step. If the agent
    recommends the tool, it will need to be the next agent to speak in order to execute the tool.

    Args:
        agent (ConversableAgent): The agent to which the tool will be registered.
    """
    self.register_for_llm(agent)
    self.register_for_execution(agent)

BrowserUseTool

autogen.tools.experimental.BrowserUseTool #

name property #

description property #

func property #

tool_schema property #

function_schema property #

realtime_tool_schema property #

register_for_llm #

register_for_execution #

register_tool #

name `property` #

description `property` #

func `property` #

tool_schema `property` #

function_schema `property` #

realtime_tool_schema `property` #