Spaces:
Runtime error
Runtime error
Upload 20 files
Browse files- .gitattributes +6 -0
- Ehlers-Danlos-1/2024_EDS_1.pdf +0 -0
- Ehlers-Danlos-1/2024_EDS_2.pdf +3 -0
- Ehlers-Danlos-1/2024_EDS_3.pdf +3 -0
- Ehlers-Danlos-1/2024_EDS_4.pdf +3 -0
- Ehlers-Danlos-1/2024_EDS_5.pdf +3 -0
- Ehlers-Danlos-1/2024_EDS_6.pdf +0 -0
- Ehlers-Danlos-1/2024_EDS_7.pdf +0 -0
- Ehlers-Danlos-1/Unknown_EDS_1.pdf +3 -0
- Ehlers-Danlos-1/Unknown_EDS_2.pdf +0 -0
- Ehlers-Danlos-1/Unknown_EDS_3.pdf +0 -0
- Ehlers-Danlos-1/Unknown_EDS_4.pdf +0 -0
- Ehlers-Danlos-1/Unknown_EDS_5.pdf +3 -0
- app.py +144 -0
- helper.py +13 -0
- rag.ipynb +222 -0
- ragas_eval.py +36 -0
- requirements.txt +14 -0
- test.py +26 -0
- tools_cache.pkl +3 -0
- utils.py +82 -0
.gitattributes
CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Ehlers-Danlos-1/2024_EDS_2.pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Ehlers-Danlos-1/2024_EDS_3.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Ehlers-Danlos-1/2024_EDS_4.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Ehlers-Danlos-1/2024_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
Ehlers-Danlos-1/Unknown_EDS_1.pdf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
Ehlers-Danlos-1/Unknown_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
|
Ehlers-Danlos-1/2024_EDS_1.pdf
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Ehlers-Danlos-1/2024_EDS_2.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46fc736ff4174473e0a846b7ca8430c140d89cd2c9f663e105bc48b33f8d9c99
|
3 |
+
size 2616000
|
Ehlers-Danlos-1/2024_EDS_3.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fef5c8c375297158ad7ad63166405ca7ce4ac511371a8454fe9df972755b0fe
|
3 |
+
size 10344738
|
Ehlers-Danlos-1/2024_EDS_4.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25db35c77fd6aeba6b15278671a462b30ffbb6f97eb5f221e0459f6d11c0f8ed
|
3 |
+
size 1071576
|
Ehlers-Danlos-1/2024_EDS_5.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ef98bcb445da6abda66de35204634bd81d8c6dcdf53bfc3be54447ec9ad0ad
|
3 |
+
size 2772421
|
Ehlers-Danlos-1/2024_EDS_6.pdf
ADDED
Binary file (146 kB). View file
|
|
Ehlers-Danlos-1/2024_EDS_7.pdf
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Ehlers-Danlos-1/Unknown_EDS_1.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbeaf13d3298a00bc1c7acfba3177a0c639f677e0f0941452709fe60542052d4
|
3 |
+
size 21553835
|
Ehlers-Danlos-1/Unknown_EDS_2.pdf
ADDED
Binary file (428 kB). View file
|
|
Ehlers-Danlos-1/Unknown_EDS_3.pdf
ADDED
Binary file (817 kB). View file
|
|
Ehlers-Danlos-1/Unknown_EDS_4.pdf
ADDED
Binary file (392 kB). View file
|
|
Ehlers-Danlos-1/Unknown_EDS_5.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c5a77524b6bb4dca40798af5ff3e3c622216a13ac21a60d9befce255977b47a
|
3 |
+
size 1847313
|
app.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import glob
|
3 |
+
from pathlib import Path
|
4 |
+
import gradio as gr
|
5 |
+
import nest_asyncio
|
6 |
+
import dill as pickle
|
7 |
+
|
8 |
+
# Ensure async compatibility in Jupyter
|
9 |
+
nest_asyncio.apply()
|
10 |
+
|
11 |
+
# Import OpenAI key with helper function
|
12 |
+
from helper import get_openai_api_key
|
13 |
+
OPENAI_API_KEY = get_openai_api_key()
|
14 |
+
|
15 |
+
# Define the path to the directory containing the PDF files
|
16 |
+
folder_path = 'Ehlers-Danlos-1'
|
17 |
+
|
18 |
+
# Get the list of all PDF files in the directory
|
19 |
+
pdf_files = glob.glob(os.path.join(folder_path, '*.pdf'))
|
20 |
+
print(pdf_files)
|
21 |
+
|
22 |
+
# Extract just the filenames (optional)
|
23 |
+
pdf_filenames = [os.path.basename(pdf) for pdf in pdf_files]
|
24 |
+
print(pdf_filenames)
|
25 |
+
|
26 |
+
# Import utilities
|
27 |
+
from utils import get_doc_tools
|
28 |
+
|
29 |
+
# Truncate function names if necessary
|
30 |
+
def truncate_function_name(name, max_length=64):
|
31 |
+
return name if len(name) <= max_length else name[:max_length]
|
32 |
+
|
33 |
+
# Path to save/load serialized tools
|
34 |
+
tools_cache_path = 'tools_cache.pkl'
|
35 |
+
|
36 |
+
# Initialize paper_to_tools_dict
|
37 |
+
paper_to_tools_dict = {}
|
38 |
+
|
39 |
+
# Check if the cache file exists and is not empty
|
40 |
+
if os.path.exists(tools_cache_path) and os.path.getsize(tools_cache_path) > 0:
|
41 |
+
try:
|
42 |
+
with open(tools_cache_path, 'rb') as f:
|
43 |
+
paper_to_tools_dict = pickle.load(f)
|
44 |
+
except EOFError:
|
45 |
+
print("Cache file is corrupted. Recreating tools.")
|
46 |
+
paper_to_tools_dict = {}
|
47 |
+
else:
|
48 |
+
print("Cache file does not exist or is empty. Recreating tools.")
|
49 |
+
|
50 |
+
# Create tools for each PDF if not loaded from cache
|
51 |
+
if not paper_to_tools_dict:
|
52 |
+
for pdf in pdf_files:
|
53 |
+
print(f"Getting tools for paper: {pdf}")
|
54 |
+
vector_tool, summary_tool = get_doc_tools(pdf, Path(pdf).stem)
|
55 |
+
paper_to_tools_dict[pdf] = [vector_tool, summary_tool]
|
56 |
+
|
57 |
+
# Save tools to cache
|
58 |
+
with open(tools_cache_path, 'wb') as f:
|
59 |
+
pickle.dump(paper_to_tools_dict, f)
|
60 |
+
|
61 |
+
|
62 |
+
# Combine all tools into a single list
|
63 |
+
all_tools = [t for pdf in pdf_files for t in paper_to_tools_dict[pdf]]
|
64 |
+
|
65 |
+
# Define an object index and retriever over these tools
|
66 |
+
from llama_index.core import VectorStoreIndex
|
67 |
+
from llama_index.core.objects import ObjectIndex
|
68 |
+
|
69 |
+
obj_index = ObjectIndex.from_objects(
|
70 |
+
all_tools,
|
71 |
+
index_cls=VectorStoreIndex,
|
72 |
+
)
|
73 |
+
|
74 |
+
obj_retriever = obj_index.as_retriever(similarity_top_k=3)
|
75 |
+
|
76 |
+
# Initialize the OpenAI LLM
|
77 |
+
from llama_index.llms.openai import OpenAI
|
78 |
+
llm = OpenAI(model="gpt-3.5-turbo")
|
79 |
+
|
80 |
+
# Set up the agent
|
81 |
+
from llama_index.core.agent import FunctionCallingAgentWorker
|
82 |
+
from llama_index.core.agent import AgentRunner
|
83 |
+
|
84 |
+
agent_worker = FunctionCallingAgentWorker.from_tools(
|
85 |
+
tool_retriever=obj_retriever,
|
86 |
+
llm=llm,
|
87 |
+
verbose=True
|
88 |
+
)
|
89 |
+
agent = AgentRunner(agent_worker)
|
90 |
+
|
91 |
+
# Define the function to query the agent
|
92 |
+
def ask_agent(question):
|
93 |
+
response = agent.query(question)
|
94 |
+
return str(response)
|
95 |
+
|
96 |
+
# Create the Gradio interface
|
97 |
+
iface = gr.Interface(
|
98 |
+
fn=ask_agent,
|
99 |
+
inputs="text",
|
100 |
+
outputs="text",
|
101 |
+
title="EDS Research Agent",
|
102 |
+
)
|
103 |
+
|
104 |
+
# Launch the Gradio app
|
105 |
+
iface.launch(share=True)
|
106 |
+
|
107 |
+
"""
|
108 |
+
import streamlit as st
|
109 |
+
from transformers import pipeline
|
110 |
+
|
111 |
+
# Load your model
|
112 |
+
generator = pipeline('text-generation', model='gpt-3.5-turbo')
|
113 |
+
|
114 |
+
# Streamlit interface
|
115 |
+
st.title("Text Generator")
|
116 |
+
prompt = st.text_input("Enter your prompt:")
|
117 |
+
if st.button("Generate"):
|
118 |
+
result = generator(prompt, max_length=50)
|
119 |
+
st.write(result[0]['generated_text'])
|
120 |
+
|
121 |
+
"""
|
122 |
+
|
123 |
+
"""
|
124 |
+
import gradio as gr
|
125 |
+
from transformers import pipeline
|
126 |
+
|
127 |
+
# Load your model
|
128 |
+
generator = pipeline('text-generation', model='gpt-3.5-turbo')
|
129 |
+
|
130 |
+
# Define the function to generate text
|
131 |
+
def generate_text(prompt):
|
132 |
+
result = generator(prompt, max_length=50)
|
133 |
+
return result[0]['generated_text']
|
134 |
+
|
135 |
+
# Create the Gradio interface
|
136 |
+
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Text Generator")
|
137 |
+
|
138 |
+
# Launch the interface
|
139 |
+
iface.launch()
|
140 |
+
"""
|
141 |
+
"""
|
142 |
+
import torch
|
143 |
+
print(torch.__version__)
|
144 |
+
"""
|
helper.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Add your utilities or helper functions to this file.
|
2 |
+
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv, find_dotenv
|
5 |
+
|
6 |
+
# these expect to find a .env file at the directory above the lesson. # the format for that file is (without the comment) #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
|
7 |
+
def load_env():
|
8 |
+
_ = load_dotenv(find_dotenv())
|
9 |
+
|
10 |
+
def get_openai_api_key():
|
11 |
+
load_env()
|
12 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
13 |
+
return openai_api_key
|
rag.ipynb
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"#import OpenAI key with helper function\n",
|
10 |
+
"from helper import get_openai_api_key\n",
|
11 |
+
"\n",
|
12 |
+
"OPENAI_API_KEY = get_openai_api_key()"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 4,
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"#A lot of modules use async and we want them to be compatible with Jupyter notebook\n",
|
22 |
+
"import nest_asyncio\n",
|
23 |
+
"\n",
|
24 |
+
"nest_asyncio.apply()"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 5,
|
30 |
+
"metadata": {},
|
31 |
+
"outputs": [
|
32 |
+
{
|
33 |
+
"name": "stdout",
|
34 |
+
"output_type": "stream",
|
35 |
+
"text": [
|
36 |
+
"['Ehlers-Danlos-1\\\\2024_EDS_1.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_2.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_3.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_4.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_5.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_6.pdf', 'Ehlers-Danlos-1\\\\2024_EDS_7.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_1.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_2.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_3.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_4.pdf', 'Ehlers-Danlos-1\\\\Unknown_EDS_5.pdf']\n",
|
37 |
+
"['2024_EDS_1.pdf', '2024_EDS_2.pdf', '2024_EDS_3.pdf', '2024_EDS_4.pdf', '2024_EDS_5.pdf', '2024_EDS_6.pdf', '2024_EDS_7.pdf', 'Unknown_EDS_1.pdf', 'Unknown_EDS_2.pdf', 'Unknown_EDS_3.pdf', 'Unknown_EDS_4.pdf', 'Unknown_EDS_5.pdf']\n"
|
38 |
+
]
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"source": [
|
42 |
+
"import os\n",
|
43 |
+
"import glob\n",
|
44 |
+
"\n",
|
45 |
+
"# Define the path to the directory containing the PDF files\n",
|
46 |
+
"folder_path = 'Ehlers-Danlos-1'\n",
|
47 |
+
"\n",
|
48 |
+
"# Get the list of all PDF files in the directory\n",
|
49 |
+
"pdf_files = glob.glob(os.path.join(folder_path, '*.pdf'))\n",
|
50 |
+
"print(pdf_files)\n",
|
51 |
+
"\n",
|
52 |
+
"# Extract just the filenames (optional)\n",
|
53 |
+
"pdf_filenames = [os.path.basename(pdf) for pdf in pdf_files]\n",
|
54 |
+
"\n",
|
55 |
+
"# Print the list of PDF filenames\n",
|
56 |
+
"print(pdf_filenames)\n"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "code",
|
61 |
+
"execution_count": 6,
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [
|
64 |
+
{
|
65 |
+
"name": "stdout",
|
66 |
+
"output_type": "stream",
|
67 |
+
"text": [
|
68 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_1.pdf\n",
|
69 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_2.pdf\n",
|
70 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_3.pdf\n",
|
71 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_4.pdf\n",
|
72 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_5.pdf\n",
|
73 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_6.pdf\n",
|
74 |
+
"Getting tools for paper: Ehlers-Danlos-1\\2024_EDS_7.pdf\n",
|
75 |
+
"Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_1.pdf\n",
|
76 |
+
"Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_2.pdf\n",
|
77 |
+
"Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_3.pdf\n",
|
78 |
+
"Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_4.pdf\n",
|
79 |
+
"Getting tools for paper: Ehlers-Danlos-1\\Unknown_EDS_5.pdf\n"
|
80 |
+
]
|
81 |
+
}
|
82 |
+
],
|
83 |
+
"source": [
|
84 |
+
"from utils import get_doc_tools\n",
|
85 |
+
"from pathlib import Path\n",
|
86 |
+
"\n",
|
87 |
+
"# Ensure function names are within the allowed length limit\n",
|
88 |
+
"def truncate_function_name(name, max_length=64):\n",
|
89 |
+
" return name if len(name) <= max_length else name[:max_length]\n",
|
90 |
+
"\n",
|
91 |
+
"paper_to_tools_dict = {}\n",
|
92 |
+
"for pdf in pdf_files:\n",
|
93 |
+
" print(f\"Getting tools for paper: {pdf}\")\n",
|
94 |
+
" vector_tool, summary_tool = get_doc_tools(pdf, Path(pdf).stem)\n",
|
95 |
+
" #vector_tool, summary_tool = get_doc_tools(pdf, truncate_function_name(Path(pdf).stem))\n",
|
96 |
+
" paper_to_tools_dict[pdf] = [vector_tool, summary_tool]\n",
|
97 |
+
" #print(vector_tool)\n",
|
98 |
+
" #print(summary_tool)"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "code",
|
103 |
+
"execution_count": 7,
|
104 |
+
"metadata": {},
|
105 |
+
"outputs": [],
|
106 |
+
"source": [
|
107 |
+
"all_tools = [t for pdf in pdf_files for t in paper_to_tools_dict[pdf]]\n",
|
108 |
+
"#all_tools = [truncate_function_name(tool) for tool in all_tools]\n"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": 8,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": [
|
117 |
+
"# define an \"object\" index and retriever over these tools\n",
|
118 |
+
"from llama_index.core import VectorStoreIndex\n",
|
119 |
+
"from llama_index.core.objects import ObjectIndex\n",
|
120 |
+
"\n",
|
121 |
+
"obj_index = ObjectIndex.from_objects(\n",
|
122 |
+
" all_tools,\n",
|
123 |
+
" index_cls=VectorStoreIndex,\n",
|
124 |
+
")"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": 9,
|
130 |
+
"metadata": {},
|
131 |
+
"outputs": [],
|
132 |
+
"source": [
|
133 |
+
"obj_retriever = obj_index.as_retriever(similarity_top_k=3)\n"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"cell_type": "code",
|
138 |
+
"execution_count": 10,
|
139 |
+
"metadata": {},
|
140 |
+
"outputs": [],
|
141 |
+
"source": [
|
142 |
+
"from llama_index.llms.openai import OpenAI\n",
|
143 |
+
"\n",
|
144 |
+
"llm = OpenAI(model=\"gpt-3.5-turbo\")"
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"cell_type": "code",
|
149 |
+
"execution_count": 11,
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [],
|
152 |
+
"source": [
|
153 |
+
"from llama_index.core.agent import FunctionCallingAgentWorker\n",
|
154 |
+
"from llama_index.core.agent import AgentRunner\n",
|
155 |
+
"\n",
|
156 |
+
"agent_worker = FunctionCallingAgentWorker.from_tools(\n",
|
157 |
+
" tool_retriever=obj_retriever,\n",
|
158 |
+
" llm=llm, \n",
|
159 |
+
" verbose=True\n",
|
160 |
+
")\n",
|
161 |
+
"agent = AgentRunner(agent_worker)"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 1,
|
167 |
+
"metadata": {},
|
168 |
+
"outputs": [
|
169 |
+
{
|
170 |
+
"ename": "NameError",
|
171 |
+
"evalue": "name 'agent' is not defined",
|
172 |
+
"output_type": "error",
|
173 |
+
"traceback": [
|
174 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
175 |
+
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
176 |
+
"Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43magent\u001b[49m\u001b[38;5;241m.\u001b[39mquery(\n\u001b[0;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDo people with EDS suffer from dislocations, and if so, how do they manifest?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mstr\u001b[39m(response))\n",
|
177 |
+
"\u001b[1;31mNameError\u001b[0m: name 'agent' is not defined"
|
178 |
+
]
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"ename": "",
|
182 |
+
"evalue": "",
|
183 |
+
"output_type": "error",
|
184 |
+
"traceback": [
|
185 |
+
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
186 |
+
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
187 |
+
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
188 |
+
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
189 |
+
]
|
190 |
+
}
|
191 |
+
],
|
192 |
+
"source": [
|
193 |
+
"\n",
|
194 |
+
"response = agent.query(\n",
|
195 |
+
" \"Do people with EDS suffer from dislocations, and if so, how do they manifest?\"\n",
|
196 |
+
")\n",
|
197 |
+
"print(str(response))"
|
198 |
+
]
|
199 |
+
}
|
200 |
+
],
|
201 |
+
"metadata": {
|
202 |
+
"kernelspec": {
|
203 |
+
"display_name": "Python 3 (ipykernel)",
|
204 |
+
"language": "python",
|
205 |
+
"name": "python3"
|
206 |
+
},
|
207 |
+
"language_info": {
|
208 |
+
"codemirror_mode": {
|
209 |
+
"name": "ipython",
|
210 |
+
"version": 3
|
211 |
+
},
|
212 |
+
"file_extension": ".py",
|
213 |
+
"mimetype": "text/x-python",
|
214 |
+
"name": "python",
|
215 |
+
"nbconvert_exporter": "python",
|
216 |
+
"pygments_lexer": "ipython3",
|
217 |
+
"version": "3.12.3"
|
218 |
+
}
|
219 |
+
},
|
220 |
+
"nbformat": 4,
|
221 |
+
"nbformat_minor": 2
|
222 |
+
}
|
ragas_eval.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from helper import get_openai_api_key
|
4 |
+
|
5 |
+
venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'Lib', 'python3.12', 'site-packages')
|
6 |
+
sys.path.append(venv_path)
|
7 |
+
|
8 |
+
os.environ["OPENAI_API_KEY"] = get_openai_api_key()
|
9 |
+
|
10 |
+
from langchain_community.document_loaders import DirectoryLoader
|
11 |
+
loader = DirectoryLoader("Ehlers-Danlos-1")
|
12 |
+
documents = loader.load()
|
13 |
+
|
14 |
+
for document in documents:
|
15 |
+
document.metadata['filename'] = document.metadata['source']
|
16 |
+
|
17 |
+
from ragas.testset.generator import TestsetGenerator
|
18 |
+
from ragas.testset.evolutions import simple, reasoning, multi_context
|
19 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
20 |
+
|
21 |
+
# generator with openai models
|
22 |
+
generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
|
23 |
+
critic_llm = ChatOpenAI(model="gpt-4")
|
24 |
+
embeddings = OpenAIEmbeddings()
|
25 |
+
|
26 |
+
generator = TestsetGenerator.from_langchain(
|
27 |
+
generator_llm,
|
28 |
+
critic_llm,
|
29 |
+
embeddings
|
30 |
+
)
|
31 |
+
|
32 |
+
# generate testset
|
33 |
+
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
|
34 |
+
print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
35 |
+
print(testset)
|
36 |
+
testset.to_pandas()
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# requirements file
|
2 |
+
# note which revision of python, for example 3.9.6
|
3 |
+
# in this file, insert all the pip install needs, include revision
|
4 |
+
|
5 |
+
|
6 |
+
python-dotenv==1.0.0
|
7 |
+
|
8 |
+
llama-index==0.10.27
|
9 |
+
llama-index-llms-openai==0.1.15
|
10 |
+
llama-index-embeddings-openai==0.1.7
|
11 |
+
|
12 |
+
gradio
|
13 |
+
transformers
|
14 |
+
torch>=1.8.0
|
test.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
|
4 |
+
# Add the virtual environment's site-packages to sys.path
|
5 |
+
# Replace 'pythonX.Y' with your Python version, e.g., 'python3.8'
|
6 |
+
venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'lib', 'site-packages')
|
7 |
+
sys.path.append(venv_path)
|
8 |
+
|
9 |
+
# Ensure the directory structure is recognized as a package
|
10 |
+
# You can verify by listing the contents of the directory
|
11 |
+
print("sys.path:", sys.path)
|
12 |
+
print("Contents of venv_path:", os.listdir(venv_path))
|
13 |
+
|
14 |
+
# Now import the TestsetGenerator
|
15 |
+
try:
|
16 |
+
from ragas.testset.generator import TestsetGenerator
|
17 |
+
print("Successfully imported TestsetGenerator.")
|
18 |
+
except ImportError as e:
|
19 |
+
print("ImportError:", e)
|
20 |
+
|
21 |
+
# Use the imported function or class
|
22 |
+
try:
|
23 |
+
generator = TestsetGenerator()
|
24 |
+
print("Successfully created a TestsetGenerator instance.")
|
25 |
+
except Exception as e:
|
26 |
+
print("Error creating TestsetGenerator instance:", e)
|
tools_cache.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bd5c05da1d3423251cb117b519c5f46662199b6c15f4db4591e226f70a584d6
|
3 |
+
size 8897145
|
utils.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core import SimpleDirectoryReader
|
2 |
+
from llama_index.core.node_parser import SentenceSplitter
|
3 |
+
from llama_index.core import Settings
|
4 |
+
from llama_index.llms.openai import OpenAI
|
5 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
6 |
+
from llama_index.core import SummaryIndex, VectorStoreIndex
|
7 |
+
from llama_index.core.tools import QueryEngineTool
|
8 |
+
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
|
9 |
+
from llama_index.core.selectors import LLMSingleSelector
|
10 |
+
|
11 |
+
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
|
12 |
+
from llama_index.core.node_parser import SentenceSplitter
|
13 |
+
from llama_index.core.tools import FunctionTool, QueryEngineTool
|
14 |
+
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
|
15 |
+
from typing import List, Optional
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
def get_doc_tools(
|
20 |
+
file_path: str,
|
21 |
+
name: str,
|
22 |
+
) -> str:
|
23 |
+
"""Get vector query and summary query tools from a document."""
|
24 |
+
|
25 |
+
# load documents
|
26 |
+
documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
|
27 |
+
splitter = SentenceSplitter(chunk_size=1024)
|
28 |
+
nodes = splitter.get_nodes_from_documents(documents)
|
29 |
+
vector_index = VectorStoreIndex(nodes)
|
30 |
+
|
31 |
+
def vector_query(
|
32 |
+
query: str,
|
33 |
+
page_numbers: Optional[List[str]] = None
|
34 |
+
) -> str:
|
35 |
+
"""Use to answer questions over a given paper.
|
36 |
+
|
37 |
+
Useful if you have specific questions over the paper.
|
38 |
+
Always leave page_numbers as None UNLESS there is a specific page you want to search for.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
query (str): the string query to be embedded.
|
42 |
+
page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
|
43 |
+
if we want to perform a vector search
|
44 |
+
over all pages. Otherwise, filter by the set of specified pages.
|
45 |
+
|
46 |
+
"""
|
47 |
+
|
48 |
+
page_numbers = page_numbers or []
|
49 |
+
metadata_dicts = [
|
50 |
+
{"key": "page_label", "value": p} for p in page_numbers
|
51 |
+
]
|
52 |
+
|
53 |
+
query_engine = vector_index.as_query_engine(
|
54 |
+
similarity_top_k=2,
|
55 |
+
filters=MetadataFilters.from_dicts(
|
56 |
+
metadata_dicts,
|
57 |
+
condition=FilterCondition.OR
|
58 |
+
)
|
59 |
+
)
|
60 |
+
response = query_engine.query(query)
|
61 |
+
return response
|
62 |
+
|
63 |
+
|
64 |
+
vector_query_tool = FunctionTool.from_defaults(
|
65 |
+
name=f"vector_tool_{name}",
|
66 |
+
fn=vector_query
|
67 |
+
)
|
68 |
+
|
69 |
+
summary_index = SummaryIndex(nodes)
|
70 |
+
summary_query_engine = summary_index.as_query_engine(
|
71 |
+
response_mode="tree_summarize",
|
72 |
+
use_async=True,
|
73 |
+
)
|
74 |
+
summary_tool = QueryEngineTool.from_defaults(
|
75 |
+
name=f"summary_tool_{name}",
|
76 |
+
query_engine=summary_query_engine,
|
77 |
+
description=(
|
78 |
+
f"Useful for summarization questions related to {name}"
|
79 |
+
),
|
80 |
+
)
|
81 |
+
|
82 |
+
return vector_query_tool, summary_tool
|