Build a Hybrid-Memory Autonomous Agent with Modular Architecture and Tool Dispatch Using OpenAI
Editors Pick
Agentic AI
Software Engineering
Staff
Tutorials
In this tutorial, we begin by exploring the architecture behind a hybrid-memory autonomous agent. This system combines semantic vector search, keyword-based retrieval, and a modular tool-dispatching loop to create an agent capable of reasoning, remembering, and acting autonomously. We walk through each layer of the design from the ground up, starting with abstract interfaces that enforce clean separation of concerns, all the way to a live agent that manages its own long-term memory.
Copy Code
Copied
Use a different Browser
!pip install openai numpy rank_bm25 --quiet
import os, json, math, re, time, getpass
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple
import numpy as np
from rank_bm25 import BM25Okapi
from openai import OpenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or getpass.getpass("🔑 Enter your OpenAI API key (hidden): ")
client = OpenAI(api_key=OPENAI_API_KEY)
EMBED_MODEL = "text-embedding-3-small"
CHAT_MODEL = "gpt-4o-mini"
print("✅ OpenAI client ready.")
We kick things off by installing all required dependencies and configuring our Python environment with the necessary imports. We securely collect the OpenAI API key using getpass, ensuring the key is never echoed to the terminal or notebook output. We also define the two global constants, the embedding model and the chat model, that every subsequent snippet depends on.
Copy Code
Copied
Use a different Browser
class MemoryBackend(ABC):
@abstractmethod
def store(self, text: str, metadata: Dict[str, Any]) -> str: ...
@abstractmethod
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]: ...
@abstractmethod
def list_all(self) -> List[Dict[str, Any]]: ...
class LLMProvider(ABC):
@abstractmethod
def complete(self, messages: List[Dict], tools: Optional[List] = None) -> Dict: ...
class Tool(ABC):
name: str
description: str
@abstractmethod
def run(self, **kwargs) -> str: ...
def schema(self) -> Dict:
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": {"type": "object", "properties": {}, "required": []},
},
}
@dataclass
class MemoryChunk:
id: str
text: str
metadata: Dict[str, Any]
embedding: Optional[np.ndarray] = field(default=None, repr=False)
def _embed(texts: List[str]) -> List[np.ndarray]:
resp = client.embeddings.create(model=EMBED_MODEL, input=texts)
vecs = [np.array(d.embedding, dtype=np.float32) for d in resp.data]
return [v / (np.linalg.norm(v) + 1e-10) for v in vecs]
def _tokenise(text: str) -> List[str]:
return re.sub(r"[^a-z0-9\s]", "", text.lower()).split()
class HybridMemory(MemoryBackend):
RRF_K = 60
def __init__(self):
self._chunks: List[MemoryChunk] = []
self._bm25: Optional[BM25Okapi] = None
self._counter = 0
def store(self, text: str, metadata: Dict[str, Any] | None = None) -> str:
metadata = metadata or {}
self._counter += 1
chunk_id = f"mem_{self._counter:04d}"
[vec] = _embed([text])
chunk = MemoryChunk(id=chunk_id, text=text, metadata=metadata, embedding=vec)
self._chunks.append(chunk)
corpus = [_tokenise(c.text) for c in self._chunks]
self._bm25 = BM25Okapi(corpus)
print(f" 💾 Stored [{chunk_id}]: {text[:60]}…" if len(text) > 60 else f" 💾 Stored [{chunk_id}]: {text}")
return chunk_id
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
if not self._chunks:
return []
n = len(self._chunks)
top_k = min(top_k, n)
[q_vec] = _embed([query])
cos_scores = np.array([np.dot(q_vec, c.embedding) for c in self._chunks])
vec_ranks = {self._chunks[i].id: rank + 1 for rank, i in enumerate(np.argsort(-cos_scores))}
bm25_scores = self._bm25.get_scores(_tokenise(query))
kw_ranks = {self._chunks[i].id: rank + 1 for rank, i in enumerate(np.argsort(-bm25_scores))}
rrf: Dict[str, float] = {}
for chunk in self._chunks:
cid = chunk.id
rrf[cid] = (1.0 / (self.RRF_K + vec_ranks.get(cid, n + 1)) +
1.0 / (self.RRF_K + kw_ranks.get(cid, n + 1)))
ranked_ids = sorted(rrf, key=lambda x: rrf[x], reverse=True)[:top_k]
results = []
ids = [c.id for c in self._chunks]
for cid in ranked_ids:
chunk = next(c for c in self._chunks if c.id == cid)
results.append({
"id": chunk.id,
"text": chunk.text,
"metadata": chunk.metadata,
"rrf_score": round(rrf[cid], 6),
"cosine": round(float(cos_scores[ids.index(cid)]), 4),
"bm25": round(float(bm25_scores[ids.index(cid)]), 4),
})
return results
def list_all(self) -> List[Dict[str, Any]]:
return [{"id": c.id, "text": c.text, "metadata": c.metadata} for c in self._chunks]
class OpenAIProvider(LLMProvider):
def __init__(self, model: str = CHAT_MODEL, temperature: float = 0.2):
self.model = model
self.temperature = temperature
def complete(self, messages: List[Dict], tools: Optional[List] = None) -> Dict:
kwargs: Dict[str, Any] = dict(model=self.model, messages=messages, temperature=self.temperature)
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
response = client.chat.completions.create(**kwargs)
msg = response.choices[0].message
result: Dict[str, Any] = {"role": "assistant", "content": msg.content or ""}
if msg.tool_calls:
result["tool_calls"] = [
{
"id": tc.id,
"type": "function",
"function": {"name": tc.function.name, "arguments": tc.function.arguments},
}
for tc in msg.tool_calls
]
return result
print("✅ Interfaces, HybridMemory, and OpenAIProvider ready.")
We define the three core abstract base classes, MemoryBackend, LLMProvider, and Tool, that serve as the interface contracts every concrete component must honour. We then implement HybridMemory, which stores embeddings for vector search and maintains a live BM25 index for keyword matching, merging both result sets using Reciprocal Rank Fusion. We close the snippet with OpenAIProvider, a concrete LLMProvider that normalises the OpenAI response into a provider-agnostic dictionary the agent can consume without knowing which model sits underneath.
Copy Code
Copied
Use a different Browser
class MemoryStoreTool(Tool):
name = "memory_store"
description = "Save an important fact or piece of information to long-term memory."
def __init__(self, memory: MemoryBackend):
self._mem = memory
def run(self, text: str, category: str = "general") -> str:
chunk_id = self._mem.store(text, {"category": category})
return f"Stored as {chunk_id}."
def schema(self) -> Dict:
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "The fact to remember."},
"category": {"type": "string", "description": "Category tag, e.g. 'user_pref', 'task', 'fact'."},
},
"required": ["text"],
},
},
}
class MemorySearchTool(Tool):
name = "memory_search"
description = "Search long-term memory for information relevant to a query."
def __init__(self, memory: MemoryBackend):
self._mem = memory
def run(self, query: str, top_k: int = 3) -> str:
results = self._mem.search(query, top_k=top_k)
if not results:
return "No relevant memories found."
lines = [f"[{r['id']}] (score={r['rrf_score']}) {r['text']}" for r in results]
return "Relevant memories:\n" + "\n".join(lines)
def schema(self) -> Dict:
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "What to look for."},
"top_k": {"type": "integer", "description": "Max results (default 3)."},
},
"required": ["query"],
},
},
}
class CalculatorTool(Tool):
name = "calculator"
description = "Evaluate a safe mathematical expression, e.g. '2 ** 10 + sqrt(144)'."
def run(self, expression: str) -> str:
allowed = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
allowed.update({"abs": abs, "round": round})
try:
result = eval(expression, {"__builtins__": {}}, allowed)
return str(result)
except Exception as exc:
return f"Error: {exc}"
def schema(self) -> Dict:
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": {
"type": "object",
"properties": {
"expression": {"type": "string", "description": "Math expression to evaluate."},
},
"required": ["expression"],
},
},
}
class WebSnippetTool(Tool):
name = "web_search"
description = "Search the web for current information on a topic (simulated)
← Back to news