This engine stores ingested documents in memory and then injects them into an internal agent's system message for answering queries.
This implements the autogen.agentchat.contrib.rag.RAGQueryEngine protocol.
Source code in autogen/agents/experimental/document_agent/inmemory_query_engine.py
| def __init__(
self,
llm_config: dict[str, Any],
) -> None:
# Deep copy the llm config to avoid changing the original
structured_config = copy.deepcopy(llm_config)
# The query agent will answer with a structured output
structured_config["response_format"] = QueryAnswer
# Our agents for querying
self._query_agent = ConversableAgent(
name="inmemory_query_agent",
llm_config=structured_config,
)
# In-memory storage for ingested documents
self._ingested_documents: list[DocumentStore] = []
|
query
query(question, *args, **kwargs)
Run a query against the ingested documents and return the answer.
Source code in autogen/agents/experimental/document_agent/inmemory_query_engine.py
| def query(self, question: str, *args: Any, **kwargs: Any) -> str:
"""Run a query against the ingested documents and return the answer."""
# If no documents have been ingested, return an empty response
if not self._ingested_documents:
return QUERY_NO_INGESTIONS_REPLY
# Put the context into the system message
context_parts = []
for i, doc in enumerate(self._ingested_documents, 1):
context_parts.append(f"Ingested File/URL {i} - '{doc.ingestation_name}':\n{doc.content}\n")
context = "\n".join(context_parts)
system_message = (
"You are a query agent tasked with answering questions based on ingested documents.\n\n"
"AVAILABLE DOCUMENTS:\n"
+ "\n".join([f"- {doc.ingestation_name}" for doc in self._ingested_documents])
+ "\n\n"
"When answering questions about these documents, use ONLY the information in the following context:\n\n"
f"{context}\n\n"
"IMPORTANT: The user will ask about these documents by name. When they do, provide helpful, detailed answers based on the document content above."
)
self._query_agent.update_system_message(system_message)
message = f"Using ONLY the document content in your system message, answer this question: {question}"
chat_result = self._query_agent.run(
message=message,
max_turns=1,
)
try:
# Get the structured output and return the answer
answer_object = QueryAnswer.model_validate(json.loads(chat_result.summary))
if answer_object.could_answer:
return answer_object.answer
else:
if answer_object.answer:
return COULD_NOT_ANSWER_REPLY + ": " + answer_object.answer
else:
return COULD_NOT_ANSWER_REPLY
except Exception as e:
# Error converting the response to the structured output
return ERROR_RESPONSE_REPLY + str(e)
|
add_docs
add_docs(new_doc_dir=None, new_doc_paths_or_urls=None)
Add additional documents to the in-memory store
Loads new Docling-parsed Markdown files from a specified directory or a list of file paths and inserts them into the in-memory store.
PARAMETER | DESCRIPTION |
new_doc_dir | The directory path from which to load additional documents. If provided, all eligible files in this directory are loaded. TYPE: Optional[Union[Path, str]] DEFAULT: None |
new_doc_paths_or_urls | A list of file paths specifying additional documents to load. Each file should be a Docling-parsed Markdown file. TYPE: Optional[Sequence[Union[Path, str]]] DEFAULT: None |
Source code in autogen/agents/experimental/document_agent/inmemory_query_engine.py
| def add_docs(
self,
new_doc_dir: Optional[Union[Path, str]] = None,
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
) -> None:
"""
Add additional documents to the in-memory store
Loads new Docling-parsed Markdown files from a specified directory or a list of file paths
and inserts them into the in-memory store.
Args:
new_doc_dir: The directory path from which to load additional documents.
If provided, all eligible files in this directory are loaded.
new_doc_paths_or_urls: A list of file paths specifying additional documents to load.
Each file should be a Docling-parsed Markdown file.
"""
new_doc_dir = new_doc_dir or ""
new_doc_paths = new_doc_paths_or_urls or []
self._load_doc(input_dir=new_doc_dir, input_docs=new_doc_paths)
|
init_db
init_db(new_doc_dir=None, new_doc_paths_or_urls=None, *args, **kwargs)
Not required nor implemented for InMemoryQueryEngine
Source code in autogen/agents/experimental/document_agent/inmemory_query_engine.py
| def init_db(
self,
new_doc_dir: Optional[Union[Path, str]] = None,
new_doc_paths_or_urls: Optional[Sequence[Union[Path, str]]] = None,
*args: Any,
**kwargs: Any,
) -> bool:
"""Not required nor implemented for InMemoryQueryEngine"""
raise NotImplementedError("Method, init_db, not required nor implemented for InMemoryQueryEngine")
|
connect_db
connect_db(*args, **kwargs)
Not required nor implemented for InMemoryQueryEngine
Source code in autogen/agents/experimental/document_agent/inmemory_query_engine.py
| def connect_db(self, *args: Any, **kwargs: Any) -> bool:
"""Not required nor implemented for InMemoryQueryEngine"""
raise NotImplementedError("Method, connect_db, not required nor implemented for InMemoryQueryEngine")
|