Spaces:

hayuh
/

EDS-Research-Agent-Old

Runtime error

App Files Files Community

hayuh commited on Jul 7, 2024

Commit

e38a9d9

verified ·

1 Parent(s): 81fae24

Upload 20 files

Browse files

Files changed (21) hide show

.gitattributes +6 -0
Ehlers-Danlos-1/2024_EDS_1.pdf +0 -0
Ehlers-Danlos-1/2024_EDS_2.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_3.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_4.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_5.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_6.pdf +0 -0
Ehlers-Danlos-1/2024_EDS_7.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_1.pdf +3 -0
Ehlers-Danlos-1/Unknown_EDS_2.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_3.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_4.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_5.pdf +3 -0
app.py +144 -0
helper.py +13 -0
rag.ipynb +222 -0
ragas_eval.py +36 -0
requirements.txt +14 -0
test.py +26 -0
tools_cache.pkl +3 -0
utils.py +82 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_2.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_3.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_4.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/Unknown_EDS_1.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/Unknown_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text

Ehlers-Danlos-1/2024_EDS_1.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

Ehlers-Danlos-1/2024_EDS_2.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46fc736ff4174473e0a846b7ca8430c140d89cd2c9f663e105bc48b33f8d9c99
+size 2616000

Ehlers-Danlos-1/2024_EDS_3.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fef5c8c375297158ad7ad63166405ca7ce4ac511371a8454fe9df972755b0fe
+size 10344738

Ehlers-Danlos-1/2024_EDS_4.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25db35c77fd6aeba6b15278671a462b30ffbb6f97eb5f221e0459f6d11c0f8ed
+size 1071576

Ehlers-Danlos-1/2024_EDS_5.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57ef98bcb445da6abda66de35204634bd81d8c6dcdf53bfc3be54447ec9ad0ad
+size 2772421

Ehlers-Danlos-1/2024_EDS_6.pdf ADDED Viewed

Binary file (146 kB). View file

Ehlers-Danlos-1/2024_EDS_7.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

Ehlers-Danlos-1/Unknown_EDS_1.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbeaf13d3298a00bc1c7acfba3177a0c639f677e0f0941452709fe60542052d4
+size 21553835

Ehlers-Danlos-1/Unknown_EDS_2.pdf ADDED Viewed

Binary file (428 kB). View file

Ehlers-Danlos-1/Unknown_EDS_3.pdf ADDED Viewed

Binary file (817 kB). View file

Ehlers-Danlos-1/Unknown_EDS_4.pdf ADDED Viewed

Binary file (392 kB). View file

Ehlers-Danlos-1/Unknown_EDS_5.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c5a77524b6bb4dca40798af5ff3e3c622216a13ac21a60d9befce255977b47a
+size 1847313

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import glob
+from pathlib import Path
+import gradio as gr
+import nest_asyncio
+import dill as pickle
+# Ensure async compatibility in Jupyter
+nest_asyncio.apply()
+# Import OpenAI key with helper function
+from helper import get_openai_api_key
+OPENAI_API_KEY = get_openai_api_key()
+# Define the path to the directory containing the PDF files
+folder_path = 'Ehlers-Danlos-1'
+# Get the list of all PDF files in the directory
+pdf_files = glob.glob(os.path.join(folder_path, '*.pdf'))
+print(pdf_files)
+# Extract just the filenames (optional)
+pdf_filenames = [os.path.basename(pdf) for pdf in pdf_files]
+print(pdf_filenames)
+# Import utilities
+from utils import get_doc_tools
+# Truncate function names if necessary
+def truncate_function_name(name, max_length=64):
+    return name if len(name) <= max_length else name[:max_length]
+# Path to save/load serialized tools
+tools_cache_path = 'tools_cache.pkl'
+# Initialize paper_to_tools_dict
+paper_to_tools_dict = {}
+# Check if the cache file exists and is not empty
+if os.path.exists(tools_cache_path) and os.path.getsize(tools_cache_path) > 0:
+    try:
+        with open(tools_cache_path, 'rb') as f:
+            paper_to_tools_dict = pickle.load(f)
+    except EOFError:
+        print("Cache file is corrupted. Recreating tools.")
+        paper_to_tools_dict = {}
+else:
+    print("Cache file does not exist or is empty. Recreating tools.")
+# Create tools for each PDF if not loaded from cache
+if not paper_to_tools_dict:
+    for pdf in pdf_files:
+        print(f"Getting tools for paper: {pdf}")
+        vector_tool, summary_tool = get_doc_tools(pdf, Path(pdf).stem)
+        paper_to_tools_dict[pdf] = [vector_tool, summary_tool]
+    # Save tools to cache
+    with open(tools_cache_path, 'wb') as f:
+        pickle.dump(paper_to_tools_dict, f)
+# Combine all tools into a single list
+all_tools = [t for pdf in pdf_files for t in paper_to_tools_dict[pdf]]
+# Define an object index and retriever over these tools
+from llama_index.core import VectorStoreIndex
+from llama_index.core.objects import ObjectIndex
+obj_index = ObjectIndex.from_objects(
+    all_tools,
+    index_cls=VectorStoreIndex,
+)
+obj_retriever = obj_index.as_retriever(similarity_top_k=3)
+# Initialize the OpenAI LLM
+from llama_index.llms.openai import OpenAI
+llm = OpenAI(model="gpt-3.5-turbo")
+# Set up the agent
+from llama_index.core.agent import FunctionCallingAgentWorker
+from llama_index.core.agent import AgentRunner
+agent_worker = FunctionCallingAgentWorker.from_tools(
+    tool_retriever=obj_retriever,
+    llm=llm,
+    verbose=True
+)
+agent = AgentRunner(agent_worker)
+# Define the function to query the agent
+def ask_agent(question):
+    response = agent.query(question)
+    return str(response)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=ask_agent,
+    inputs="text",
+    outputs="text",
+    title="EDS Research Agent",
+)
+# Launch the Gradio app
+iface.launch(share=True)
+"""
+import streamlit as st
+from transformers import pipeline
+# Load your model
+generator = pipeline('text-generation', model='gpt-3.5-turbo')
+# Streamlit interface
+st.title("Text Generator")
+prompt = st.text_input("Enter your prompt:")
+if st.button("Generate"):
+    result = generator(prompt, max_length=50)
+    st.write(result[0]['generated_text'])
+"""
+"""
+import gradio as gr
+from transformers import pipeline
+# Load your model
+generator = pipeline('text-generation', model='gpt-3.5-turbo')
+# Define the function to generate text
+def generate_text(prompt):
+    result = generator(prompt, max_length=50)
+    return result[0]['generated_text']
+# Create the Gradio interface
+iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Text Generator")
+# Launch the interface
+iface.launch()
+"""
+"""
+import torch
+print(torch.__version__)
+"""

helper.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Add your utilities or helper functions to this file.
+import os
+from dotenv import load_dotenv, find_dotenv
+# these expect to find a .env file at the directory above the lesson.                                                                                                                     # the format for that file is (without the comment)                                                                                                                                       #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
+def load_env():
+    _ = load_dotenv(find_dotenv())
+def get_openai_api_key():
+    load_env()
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    return openai_api_key

rag.ipynb ADDED Viewed

	@@ -0,0 +1,222 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#import OpenAI key with helper function\n",
+    "from helper import get_openai_api_key\n",
+    "\n",
+    "OPENAI_API_KEY = get_openai_api_key()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#A lot of modules use async and we want them to be compatible with Jupyter notebook\n",
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Ehlers-Danlos-1\\\\2024_EDS_1.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_2.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_3.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_4.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_5.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_6.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_7.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_1.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_2.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_3.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_4.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_5.pdf']\n",
+      "['2024_EDS_1.pdf', '2024_EDS_2.pdf', '2024_EDS_3.pdf', '2024_EDS_4.pdf', '2024_EDS_5.pdf', '2024_EDS_6.pdf', '2024_EDS_7.pdf', 'Unknown_EDS_1.pdf', 'Unknown_EDS_2.pdf', 'Unknown_EDS_3.pdf', 'Unknown_EDS_4.pdf', 'Unknown_EDS_5.pdf']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import glob\n",
+    "\n",
+    "# Define the path to the directory containing the PDF files\n",
+    "folder_path = 'Ehlers-Danlos-1'\n",
+    "\n",
+    "# Get the list of all PDF files in the directory\n",
+    "pdf_files = glob.glob(os.path.join(folder_path, '*.pdf'))\n",
+    "print(pdf_files)\n",
+    "\n",
+    "# Extract just the filenames (optional)\n",
+    "pdf_filenames = [os.path.basename(pdf) for pdf in pdf_files]\n",
+    "\n",
+    "# Print the list of PDF filenames\n",
+    "print(pdf_filenames)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_1.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_2.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_3.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_4.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_5.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_6.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_7.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_1.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_2.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_3.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_4.pdf\n",
+      "Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_5.pdf\n"
+     ]
+    }
+   ],
+   "source": [
+    "from utils import get_doc_tools\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Ensure function names are within the allowed length limit\n",
+    "def truncate_function_name(name, max_length=64):\n",
+    "    return name if len(name) <= max_length else name[:max_length]\n",
+    "\n",
+    "paper_to_tools_dict = {}\n",
+    "for pdf in pdf_files:\n",
+    "    print(f\"Getting tools for paper: {pdf}\")\n",
+    "    vector_tool, summary_tool = get_doc_tools(pdf, Path(pdf).stem)\n",
+    "    #vector_tool, summary_tool = get_doc_tools(pdf, truncate_function_name(Path(pdf).stem))\n",
+    "    paper_to_tools_dict[pdf] = [vector_tool, summary_tool]\n",
+    "    #print(vector_tool)\n",
+    "    #print(summary_tool)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_tools = [t for pdf in pdf_files for t in paper_to_tools_dict[pdf]]\n",
+    "#all_tools = [truncate_function_name(tool) for tool in all_tools]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define an \"object\" index and retriever over these tools\n",
+    "from llama_index.core import VectorStoreIndex\n",
+    "from llama_index.core.objects import ObjectIndex\n",
+    "\n",
+    "obj_index = ObjectIndex.from_objects(\n",
+    "    all_tools,\n",
+    "    index_cls=VectorStoreIndex,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj_retriever = obj_index.as_retriever(similarity_top_k=3)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "llm = OpenAI(model=\"gpt-3.5-turbo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import FunctionCallingAgentWorker\n",
+    "from llama_index.core.agent import AgentRunner\n",
+    "\n",
+    "agent_worker = FunctionCallingAgentWorker.from_tools(\n",
+    "    tool_retriever=obj_retriever,\n",
+    "    llm=llm, \n",
+    "    verbose=True\n",
+    ")\n",
+    "agent = AgentRunner(agent_worker)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'agent' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43magent\u001b[49m\u001b[38;5;241m.\u001b[39mquery(\n\u001b[0;32m      2\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDo people with EDS suffer from dislocations, and if so, how do they manifest?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m      3\u001b[0m )\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mstr\u001b[39m(response))\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'agent' is not defined"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "response = agent.query(\n",
+    "    \"Do people with EDS suffer from dislocations, and if so, how do they manifest?\"\n",
+    ")\n",
+    "print(str(response))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

ragas_eval.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+import sys
+from helper import get_openai_api_key
+venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'Lib', 'python3.12', 'site-packages')
+sys.path.append(venv_path)
+os.environ["OPENAI_API_KEY"] = get_openai_api_key()
+from langchain_community.document_loaders import DirectoryLoader
+loader = DirectoryLoader("Ehlers-Danlos-1")
+documents = loader.load()
+for document in documents:
+    document.metadata['filename'] = document.metadata['source']
+from ragas.testset.generator import TestsetGenerator
+from ragas.testset.evolutions import simple, reasoning, multi_context
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+# generator with openai models
+generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
+critic_llm = ChatOpenAI(model="gpt-4")
+embeddings = OpenAIEmbeddings()
+generator = TestsetGenerator.from_langchain(
+    generator_llm,
+    critic_llm,
+    embeddings
+)
+# generate testset
+testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
+print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+print(testset)
+testset.to_pandas()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# requirements file
+# note which revision of python, for example 3.9.6
+# in this file, insert all the pip install needs, include revision
+python-dotenv==1.0.0
+llama-index==0.10.27
+llama-index-llms-openai==0.1.15
+llama-index-embeddings-openai==0.1.7
+gradio
+transformers
+torch>=1.8.0

test.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import sys
+import os
+# Add the virtual environment's site-packages to sys.path
+# Replace 'pythonX.Y' with your Python version, e.g., 'python3.8'
+venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'lib', 'site-packages')
+sys.path.append(venv_path)
+# Ensure the directory structure is recognized as a package
+# You can verify by listing the contents of the directory
+print("sys.path:", sys.path)
+print("Contents of venv_path:", os.listdir(venv_path))
+# Now import the TestsetGenerator
+try:
+    from ragas.testset.generator import TestsetGenerator
+    print("Successfully imported TestsetGenerator.")
+except ImportError as e:
+    print("ImportError:", e)
+# Use the imported function or class
+try:
+    generator = TestsetGenerator()
+    print("Successfully created a TestsetGenerator instance.")
+except Exception as e:
+    print("Error creating TestsetGenerator instance:", e)

tools_cache.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bd5c05da1d3423251cb117b519c5f46662199b6c15f4db4591e226f70a584d6
+size 8897145

utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core import Settings
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core import SummaryIndex, VectorStoreIndex
+from llama_index.core.tools import QueryEngineTool
+from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
+from llama_index.core.selectors import LLMSingleSelector
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.tools import FunctionTool, QueryEngineTool
+from llama_index.core.vector_stores import MetadataFilters, FilterCondition
+from typing import List, Optional
+def get_doc_tools(
+    file_path: str,
+    name: str,
+) -> str:
+    """Get vector query and summary query tools from a document."""
+    # load documents
+    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
+    splitter = SentenceSplitter(chunk_size=1024)
+    nodes = splitter.get_nodes_from_documents(documents)
+    vector_index = VectorStoreIndex(nodes)
+    def vector_query(
+        query: str,
+        page_numbers: Optional[List[str]] = None
+    ) -> str:
+        """Use to answer questions over a given paper.
+        Useful if you have specific questions over the paper.
+        Always leave page_numbers as None UNLESS there is a specific page you want to search for.
+        Args:
+            query (str): the string query to be embedded.
+            page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
+                if we want to perform a vector search
+                over all pages. Otherwise, filter by the set of specified pages.
+        """
+        page_numbers = page_numbers or []
+        metadata_dicts = [
+            {"key": "page_label", "value": p} for p in page_numbers
+        ]
+        query_engine = vector_index.as_query_engine(
+            similarity_top_k=2,
+            filters=MetadataFilters.from_dicts(
+                metadata_dicts,
+                condition=FilterCondition.OR
+            )
+        )
+        response = query_engine.query(query)
+        return response
+    vector_query_tool = FunctionTool.from_defaults(
+        name=f"vector_tool_{name}",
+        fn=vector_query
+    )
+    summary_index = SummaryIndex(nodes)
+    summary_query_engine = summary_index.as_query_engine(
+        response_mode="tree_summarize",
+        use_async=True,
+    )
+    summary_tool = QueryEngineTool.from_defaults(
+        name=f"summary_tool_{name}",
+        query_engine=summary_query_engine,
+        description=(
+            f"Useful for summarization questions related to {name}"
+        ),
+    )
+    return vector_query_tool, summary_tool