Spaces:
Sleeping
Sleeping
add files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- Dockerfile +38 -0
- README-orig.md +16 -0
- README.md +7 -6
- backend/.DS_Store +0 -0
- backend/__init__.py +0 -0
- backend/__pycache__/__init__.cpython-311.pyc +0 -0
- backend/app/__init__.py +0 -0
- backend/app/__pycache__/__init__.cpython-311.pyc +0 -0
- backend/app/__pycache__/auth.cpython-311.pyc +0 -0
- backend/app/__pycache__/graph.cpython-311.pyc +0 -0
- backend/app/__pycache__/main.cpython-311.pyc +0 -0
- backend/app/__pycache__/prompt.cpython-311.pyc +0 -0
- backend/app/__pycache__/webrtc.cpython-311.pyc +0 -0
- backend/app/agents/__initi__.py +0 -0
- backend/app/agents/__pycache__/medical.cpython-311.pyc +0 -0
- backend/app/agents/__pycache__/prompt.cpython-311.pyc +0 -0
- backend/app/agents/__pycache__/rag.cpython-311.pyc +0 -0
- backend/app/agents/__pycache__/supervisor.cpython-311.pyc +0 -0
- backend/app/agents/__pycache__/validation.cpython-311.pyc +0 -0
- backend/app/agents/__pycache__/verification.cpython-311.pyc +0 -0
- backend/app/agents/medical.py +55 -0
- backend/app/agents/prompt.py +38 -0
- backend/app/agents/rag.py +73 -0
- backend/app/agents/state/__init__.py +0 -0
- backend/app/agents/state/__pycache__/__init__.cpython-311.pyc +0 -0
- backend/app/agents/state/__pycache__/state.cpython-311.pyc +0 -0
- backend/app/agents/state/state.py +28 -0
- backend/app/agents/supervisor.py +22 -0
- backend/app/agents/verification.py +86 -0
- backend/app/auth.py +106 -0
- backend/app/chatbot.py +5 -0
- backend/app/graph.py +85 -0
- backend/app/main.py +22 -0
- backend/app/upload_pdf/__pycache__/ingest_documents.cpython-311.pyc +0 -0
- backend/app/upload_pdf/__pycache__/questions_agent.cpython-311.pyc +0 -0
- backend/app/upload_pdf/ingest_documents.py +83 -0
- backend/app/upload_pdf/questions_agent.py +49 -0
- backend/app/webrtc.py +83 -0
- backend/data/.DS_Store +0 -0
- backend/data/__init__.py +0 -0
- backend/data/__pycache__/__init__.cpython-311.pyc +0 -0
- backend/data/combined_forms/temp/ACTC-Patient-Packet.pdf +3 -0
- backend/data/preprocessing/.DS_Store +0 -0
- backend/data/preprocessing/__pycache__/ingest_documents.cpython-311.pyc +0 -0
- backend/data/preprocessing/__pycache__/questions_agent.cpython-311.pyc +0 -0
- backend/data/preprocessing/ingest_documents.py +83 -0
- backend/data/preprocessing/questions_agent.py +49 -0
- backend/data/preprocessing/vectorstore/.DS_Store +0 -0
- backend/data/preprocessing/vectorstore/__pycache__/get.cpython-311.pyc +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
RUN apt-get update && apt-get install -y \
|
4 |
+
curl \
|
5 |
+
&& curl -fsSL https://deb.nodesource.com/setup_lts.20.15.1| bash - \
|
6 |
+
&& apt-get install -y nodejs \
|
7 |
+
&& apt-get clean \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
RUN apt-get update && apt-get install -y npm
|
11 |
+
|
12 |
+
# Verify installation
|
13 |
+
RUN node --version
|
14 |
+
RUN npm --version
|
15 |
+
RUN useradd -m -u 1000 user
|
16 |
+
USER user
|
17 |
+
ENV HOME=/home/user \
|
18 |
+
PATH=/home/user/.local/bin:$PATH
|
19 |
+
WORKDIR $HOME/app
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
COPY ./backend/requirements.txt ~/app/requirements.txt
|
22 |
+
RUN pip install -r requirements.txt
|
23 |
+
COPY . .
|
24 |
+
|
25 |
+
USER root
|
26 |
+
WORKDIR $HOME/app/frontend
|
27 |
+
RUN npm install
|
28 |
+
RUN npm audit fix
|
29 |
+
RUN npm run build
|
30 |
+
RUN chown -R user:user $HOME/app/frontend
|
31 |
+
RUN chown -R user:user $HOME/app/backend
|
32 |
+
USER user
|
33 |
+
|
34 |
+
# Change back to app directory
|
35 |
+
WORKDIR $HOME/app
|
36 |
+
EXPOSE 5173
|
37 |
+
EXPOSE 8080
|
38 |
+
CMD ["/bin/bash", "-c", "./run.sh"]
|
README-orig.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Chat Patient Intake
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: mit
|
9 |
+
app_port: 5173
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
13 |
+
|
14 |
+
```
|
15 |
+
|
16 |
+
```
|
README.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
|
|
|
9 |
---
|
10 |
|
11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Chat Patient Intake
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
license: mit
|
9 |
+
app_port: 5173
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
backend/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
backend/__init__.py
ADDED
File without changes
|
backend/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (234 Bytes). View file
|
|
backend/app/__init__.py
ADDED
File without changes
|
backend/app/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (193 Bytes). View file
|
|
backend/app/__pycache__/auth.cpython-311.pyc
ADDED
Binary file (6.3 kB). View file
|
|
backend/app/__pycache__/graph.cpython-311.pyc
ADDED
Binary file (5.5 kB). View file
|
|
backend/app/__pycache__/main.cpython-311.pyc
ADDED
Binary file (1.08 kB). View file
|
|
backend/app/__pycache__/prompt.cpython-311.pyc
ADDED
Binary file (854 Bytes). View file
|
|
backend/app/__pycache__/webrtc.cpython-311.pyc
ADDED
Binary file (5.77 kB). View file
|
|
backend/app/agents/__initi__.py
ADDED
File without changes
|
backend/app/agents/__pycache__/medical.cpython-311.pyc
ADDED
Binary file (3.28 kB). View file
|
|
backend/app/agents/__pycache__/prompt.cpython-311.pyc
ADDED
Binary file (3.38 kB). View file
|
|
backend/app/agents/__pycache__/rag.cpython-311.pyc
ADDED
Binary file (4.67 kB). View file
|
|
backend/app/agents/__pycache__/supervisor.cpython-311.pyc
ADDED
Binary file (1.6 kB). View file
|
|
backend/app/agents/__pycache__/validation.cpython-311.pyc
ADDED
Binary file (1.12 kB). View file
|
|
backend/app/agents/__pycache__/verification.cpython-311.pyc
ADDED
Binary file (5.66 kB). View file
|
|
backend/app/agents/medical.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain_core.tools import tool
|
6 |
+
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
7 |
+
from langgraph.graph import END
|
8 |
+
from .prompt import SYSTEM_PROMPT, CONTEXT_PROMPT, QUESTION_PROMPT
|
9 |
+
load_dotenv()
|
10 |
+
MODEL = os.getenv("MODEL")
|
11 |
+
|
12 |
+
@tool
|
13 |
+
def user_query(query:str):
|
14 |
+
"""
|
15 |
+
Call this tool to retrieve the context of the conversation for the user's query which is an unambiguous and concise query with enough context from the message history.
|
16 |
+
"""
|
17 |
+
return query
|
18 |
+
|
19 |
+
@tool
|
20 |
+
def completed(**kwargs):
|
21 |
+
"""
|
22 |
+
Call this tool when allmedical questions have been completed.
|
23 |
+
"""
|
24 |
+
return True
|
25 |
+
|
26 |
+
tools_by_name = {
|
27 |
+
"user_query": user_query,
|
28 |
+
"completed": completed
|
29 |
+
}
|
30 |
+
|
31 |
+
def medical_route(state):
|
32 |
+
if not state["messages"]:
|
33 |
+
return END
|
34 |
+
last_message = state["messages"][-1]
|
35 |
+
if last_message.tool_calls:
|
36 |
+
return "rag_tool_node"
|
37 |
+
else:
|
38 |
+
return END
|
39 |
+
|
40 |
+
class MedicalQuestionAgent:
|
41 |
+
def __init__(self, questions=[]):
|
42 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
43 |
+
("system", SYSTEM_PROMPT),
|
44 |
+
("system", QUESTION_PROMPT),
|
45 |
+
('system', CONTEXT_PROMPT),
|
46 |
+
MessagesPlaceholder(variable_name="messages")
|
47 |
+
])
|
48 |
+
self.llm = ChatOpenAI(model=MODEL, temperature=0, streaming=True)
|
49 |
+
self.chain = self.prompt | self.llm.bind_tools([user_query, completed])
|
50 |
+
self.questions = questions
|
51 |
+
|
52 |
+
def __call__(self, state):
|
53 |
+
results = self.chain.invoke({**state, "questions": self.questions})
|
54 |
+
return {**state, "messages":[results] }
|
55 |
+
|
backend/app/agents/prompt.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SYSTEM_PROMPT="""
|
2 |
+
You are a helpful, empathetic and polite medical receptionist named Jane, assisting with basic administrative tasks. Introduce yourself as Jane in your first message and announce your purpose which is to obtain information from the patient for upcoming appointment. You are to be concise and be on track in obtaining information from the patient. You are not a medical professional, so if a patient asks a medical question, you must politely remind them that you cannot provide medical advice or diagnoses. You should always encourage the patient to speak directly with their healthcare provider for medical concerns. You are to always ask the patient one question at a time and wait for the patient's response before asking the next question. Assume the patient has a high level of literacy and is able to understand and respond to your questions.
|
3 |
+
"""
|
4 |
+
|
5 |
+
|
6 |
+
VERIFICATION_PROMPT="""
|
7 |
+
You are an expert in verification and validation. Given a list of fields, you are to retrieve information from the user for those fields. You also have the correct answers to these fields, allowing you to check if the user's responses are correct. You are not repeat the same question or ask a question that has already been asnwered by the user.
|
8 |
+
|
9 |
+
For fields that involve dates:
|
10 |
+
|
11 |
+
1. Parsing the Date: Accept the user's date input in any common format (e.g., "Jan 1 1990", "1st January 1990", "1990/01/01").
|
12 |
+
2. Converting to ISO 8601 date format: Convert the parsed date into the yyyy-MM-dd format.
|
13 |
+
3. Comparison: Compare the converted date with the correct answer you have.
|
14 |
+
|
15 |
+
If the user's date input, after conversion, matches the correct answer, consider it correct. If not, proceed as follows:
|
16 |
+
- Invalid Response Handling: Call the invalid tool without mentioning any correct values or hints.
|
17 |
+
For non-date fields:
|
18 |
+
|
19 |
+
- If the user's response is incorrect, call the 'invalid' tool with the value for the corresponding field you are verifying.
|
20 |
+
|
21 |
+
Additional Guidelines:
|
22 |
+
|
23 |
+
- Never reveal any of the correct answers to the user unless they have already provided that exact value.
|
24 |
+
- If you have successfully verified all fields, call the 'completed' tool.
|
25 |
+
- Never end with a farewell or goodbye.
|
26 |
+
"""
|
27 |
+
|
28 |
+
QUESTION_PROMPT = """\
|
29 |
+
You are an expert in asking questions to the patient. You are to ask one question at a time and wait for the patient's response before asking the next question. You are not repeat the same question unless the user has not answered it correctly or fully. Once all questions are answered, call the 'completed' tool.
|
30 |
+
|
31 |
+
QUESTIONS:
|
32 |
+
{questions}
|
33 |
+
|
34 |
+
"""
|
35 |
+
|
36 |
+
CONTEXT_PROMPT = """\
|
37 |
+
In addition to asking the patient questions, you are a friendly assistant that helps users answer their questions or responds to their comments. Only answer questions or respond to the comments with the context and message history. Do not make up information. Any query by the user that cannot be answered by you must call the 'user_query' function to retrieve the context for the user's query. Do not jump back to answering questions until you confirm that their questions have been answered if they did have questions or a query.
|
38 |
+
"""
|
backend/app/agents/rag.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.prompts import ChatPromptTemplate
|
2 |
+
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from operator import itemgetter
|
5 |
+
from langchain.schema.runnable import RunnablePassthrough
|
6 |
+
from .medical import tools_by_name
|
7 |
+
|
8 |
+
SYSTEM_PROMPT = """\
|
9 |
+
You are an expert in answering questions succintly and correctly only within context and chat history. Answer user's last question on the chat history and look at the message history for further context if needed. If you are not able to answer the last question of user based on the context reply with "I don't know". Never make up an answer.
|
10 |
+
"""
|
11 |
+
|
12 |
+
CONTEXT_PROMPT = """\
|
13 |
+
Context:
|
14 |
+
{context}
|
15 |
+
|
16 |
+
Chat History:
|
17 |
+
{chat_history}
|
18 |
+
"""
|
19 |
+
|
20 |
+
def map_messages(messages):
|
21 |
+
text=""
|
22 |
+
for message in messages:
|
23 |
+
if isinstance(message, HumanMessage):
|
24 |
+
text += f"Human: {message.content}\n"
|
25 |
+
elif isinstance(message, AIMessage):
|
26 |
+
text += f"AI: {message.content}\n"
|
27 |
+
return text
|
28 |
+
|
29 |
+
class RAGTool:
|
30 |
+
def __init__(self, llm: ChatOpenAI, retriever:None):
|
31 |
+
self.retriever = retriever
|
32 |
+
self.llm = llm
|
33 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
34 |
+
("system", SYSTEM_PROMPT),
|
35 |
+
('user', CONTEXT_PROMPT)
|
36 |
+
])
|
37 |
+
|
38 |
+
self.llm_chain = ({"context": itemgetter("question") | retriever, "chat_history": itemgetter("question")}
|
39 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
40 |
+
| {"response": self.prompt | self.llm, "context": itemgetter("context")}
|
41 |
+
)
|
42 |
+
|
43 |
+
|
44 |
+
def __call__(self, state):
|
45 |
+
last_message = state["messages"][-1]
|
46 |
+
messages = []
|
47 |
+
for tool_call in last_message.tool_calls:
|
48 |
+
print('TOOL CALL**********************',tools_by_name[tool_call["name"]], tools_by_name[tool_call["name"]].invoke({**tool_call["args"]}))
|
49 |
+
if tool_call["name"] == "user_query":
|
50 |
+
query = tools_by_name[tool_call["name"]].invoke({**tool_call["args"]})
|
51 |
+
response = self.retriever.invoke(query)
|
52 |
+
print('RESPONSE**********************', response)
|
53 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"],
|
54 |
+
content=f"Context:\n{response}"))
|
55 |
+
elif tool_call["name"] == "completed":
|
56 |
+
state["next"]+=1
|
57 |
+
print("COMPLETED!!!!!", state["next"])
|
58 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content="Medical Intake complete. Tell the user or patient that we are done with the intake process. Give them a professional and friendly farewell and mention about looking forward to seeing them at the appointment."))
|
59 |
+
else:
|
60 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=""))
|
61 |
+
|
62 |
+
return {**state, "messages": messages}
|
63 |
+
|
64 |
+
'''
|
65 |
+
user_input = state["question"]
|
66 |
+
result = self.llm_chain.invoke(
|
67 |
+
{"chat_history": map_messages(state["messages"])+f'Human: {user_input}'})
|
68 |
+
ai_message = result["response"]
|
69 |
+
context = result["context"]
|
70 |
+
return {**state, "messages": [ai_message], "context": context}
|
71 |
+
'''
|
72 |
+
|
73 |
+
|
backend/app/agents/state/__init__.py
ADDED
File without changes
|
backend/app/agents/state/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (251 Bytes). View file
|
|
backend/app/agents/state/__pycache__/state.cpython-311.pyc
ADDED
Binary file (1.88 kB). View file
|
|
backend/app/agents/state/state.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing_extensions import TypedDict
|
2 |
+
from typing import Annotated
|
3 |
+
from langgraph.graph import add_messages
|
4 |
+
|
5 |
+
class IdentificationState(TypedDict):
|
6 |
+
messages: Annotated[list, add_messages]
|
7 |
+
fields:str
|
8 |
+
values:str
|
9 |
+
counter:int
|
10 |
+
|
11 |
+
class IntakeRAGState(TypedDict):
|
12 |
+
messages: Annotated[list, add_messages]
|
13 |
+
question: str #current user input. It may or may not be a 'question'
|
14 |
+
context: str
|
15 |
+
|
16 |
+
class SupervisorState(TypedDict):
|
17 |
+
#messages: Annotated[list, add_messages]
|
18 |
+
next: str #next step in the workflow
|
19 |
+
|
20 |
+
class GraphState(TypedDict):
|
21 |
+
messages: Annotated[list, add_messages]
|
22 |
+
fields:str
|
23 |
+
values:str
|
24 |
+
counter:int
|
25 |
+
question: str #current user input. It may or may not be a 'question'
|
26 |
+
context: str
|
27 |
+
completed: str
|
28 |
+
next:int
|
backend/app/agents/supervisor.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langgraph.graph import END
|
2 |
+
class SupervisorAgent:
|
3 |
+
def __init__(self):
|
4 |
+
self.order = ['verification_agent', 'medical_agent']
|
5 |
+
self.next = 0
|
6 |
+
|
7 |
+
def __call__(self, state):
|
8 |
+
if not state.get('completed'):
|
9 |
+
state["completed"] = ""
|
10 |
+
if not state.get('next'):
|
11 |
+
state["next"] = 0
|
12 |
+
if not state.get('context'):
|
13 |
+
state["context"] = ""
|
14 |
+
|
15 |
+
return state
|
16 |
+
|
17 |
+
def route(self, state):
|
18 |
+
if self.next >= len(self.order):
|
19 |
+
return END
|
20 |
+
print("SUPERVISOR route", state["next"])
|
21 |
+
return self.order[state["next"]]
|
22 |
+
|
backend/app/agents/verification.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain_core.tools import tool
|
6 |
+
from langchain_core.messages import HumanMessage, ToolMessage
|
7 |
+
from langgraph.graph import END
|
8 |
+
from .prompt import VERIFICATION_PROMPT, SYSTEM_PROMPT
|
9 |
+
load_dotenv()
|
10 |
+
MODEL = os.getenv("MODEL")
|
11 |
+
|
12 |
+
@tool
|
13 |
+
def invalid(field:str, value:str, counter=1):
|
14 |
+
"""
|
15 |
+
Call this tool if the user's response is not valid for one of the fields you are verifying.
|
16 |
+
"""
|
17 |
+
if counter >= 2:
|
18 |
+
return f"The user's response for {field} with value {value} is not valid. Politely end the conversation and after ask them to call the support number."
|
19 |
+
else:
|
20 |
+
return f"The user's response for {field} with value {value} is not valid. Indicate to the user that it does not match our records. Please ask the user one more time."
|
21 |
+
|
22 |
+
@tool
|
23 |
+
def completed(**kwargs):
|
24 |
+
"""
|
25 |
+
Call this tool when verification is complete and successful.
|
26 |
+
"""
|
27 |
+
return "The verification is complete. Moving on to medical questions."
|
28 |
+
|
29 |
+
tools_by_name = {
|
30 |
+
"invalid": invalid,
|
31 |
+
"completed": completed
|
32 |
+
}
|
33 |
+
|
34 |
+
def verification_route(state):
|
35 |
+
if not state["messages"]:
|
36 |
+
return END
|
37 |
+
last_message = state["messages"][-1]
|
38 |
+
if last_message.tool_calls:
|
39 |
+
return "verification_tool_node"
|
40 |
+
else:
|
41 |
+
return END
|
42 |
+
|
43 |
+
class VerificationAgent:
|
44 |
+
def __init__(self):
|
45 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
46 |
+
("system", SYSTEM_PROMPT),
|
47 |
+
("system", VERIFICATION_PROMPT),
|
48 |
+
("system", "Fields:{fields}"),
|
49 |
+
("system", "Values:{values}"),
|
50 |
+
MessagesPlaceholder(variable_name="messages")
|
51 |
+
])
|
52 |
+
self.llm = ChatOpenAI(model=MODEL, temperature=0, streaming=True)
|
53 |
+
self.chain = self.prompt | self.llm.bind_tools([invalid, completed])
|
54 |
+
|
55 |
+
def __call__(self, state):
|
56 |
+
result = self.chain.invoke(state)
|
57 |
+
if not state.get("counter") or not result.tool_calls:
|
58 |
+
state["counter"] = 0
|
59 |
+
return {**state, "messages": [result]}
|
60 |
+
|
61 |
+
def process_tool(state):
|
62 |
+
last_message = state["messages"][-1]
|
63 |
+
state["counter"] = state.get("counter")+1
|
64 |
+
#print('LAST MESSAGE**********************', last_message)
|
65 |
+
messages = []
|
66 |
+
for tool_call in last_message.tool_calls:
|
67 |
+
|
68 |
+
if tool_call["name"] == "invalid":
|
69 |
+
#print('TOOL CALL**********************', tools_by_name[tool_call["name"]].invoke({**tool_call["args"], "counter": state["counter"]}))
|
70 |
+
message = tools_by_name[tool_call["name"]].invoke({**tool_call["args"], "counter": state["counter"]})
|
71 |
+
if state["counter"] >= 2:
|
72 |
+
state["counter"] = 0
|
73 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=message))
|
74 |
+
else:
|
75 |
+
state["counter"] += 1
|
76 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=f"The user's response for {tool_call['args']['field']} is not valid. Indicate to the user that it does not match our records. Please ask the user one more time."))
|
77 |
+
elif tool_call["name"] == "completed":
|
78 |
+
state["next"]+=1
|
79 |
+
print("COMPLETED!!!!!", state["next"])
|
80 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content="Verification complete. Prompt the user that we are moving on to medical questions. Do not end with a farwell. Mention that during the next stage the patient can ask any questions they have."))
|
81 |
+
else:
|
82 |
+
messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=""))
|
83 |
+
return {**state, "messages": messages}
|
84 |
+
|
85 |
+
|
86 |
+
|
backend/app/auth.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
2 |
+
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
3 |
+
from jose import JWTError, jwt
|
4 |
+
from passlib.context import CryptContext
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from datetime import datetime, timedelta, timezone
|
7 |
+
import sqlite3
|
8 |
+
import os
|
9 |
+
|
10 |
+
SECRET_KEY = "your-secret-key"
|
11 |
+
ALGORITHM = "HS256"
|
12 |
+
ACCESS_TOKEN_EXPIRE_MINUTES = 300
|
13 |
+
|
14 |
+
pwd_context = CryptContext(schemes=["django_pbkdf2_sha256"], deprecated="auto")
|
15 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
16 |
+
|
17 |
+
auth_router = APIRouter()
|
18 |
+
|
19 |
+
class User(BaseModel):
|
20 |
+
username: str
|
21 |
+
|
22 |
+
class Token(BaseModel):
|
23 |
+
access_token: str
|
24 |
+
token_type: str
|
25 |
+
|
26 |
+
# SQLite setup
|
27 |
+
DB_NAME = "users.db"
|
28 |
+
|
29 |
+
def init_db():
|
30 |
+
conn = sqlite3.connect(DB_NAME)
|
31 |
+
cur = conn.cursor()
|
32 |
+
cur.execute('''CREATE TABLE IF NOT EXISTS users
|
33 |
+
(username TEXT PRIMARY KEY, hashed_password TEXT)''')
|
34 |
+
|
35 |
+
# Add sample users if they don't exist
|
36 |
+
cur.execute("INSERT OR IGNORE INTO users VALUES (?, ?)",
|
37 |
+
("admin", pwd_context.hash("admin")))
|
38 |
+
|
39 |
+
|
40 |
+
conn.commit()
|
41 |
+
conn.close()
|
42 |
+
|
43 |
+
# Initialize the database
|
44 |
+
init_db()
|
45 |
+
|
46 |
+
def verify_password(plain_password, hashed_password):
|
47 |
+
return pwd_context.verify(plain_password, hashed_password)
|
48 |
+
|
49 |
+
def get_user(username: str):
|
50 |
+
conn = sqlite3.connect(DB_NAME)
|
51 |
+
cur = conn.cursor()
|
52 |
+
cur.execute("SELECT * FROM users WHERE username=?", (username,))
|
53 |
+
user = cur.fetchone()
|
54 |
+
conn.close()
|
55 |
+
if user:
|
56 |
+
return User(username=user[0])
|
57 |
+
return None
|
58 |
+
|
59 |
+
def authenticate_user(username: str, password: str):
|
60 |
+
conn = sqlite3.connect(DB_NAME)
|
61 |
+
cur = conn.cursor()
|
62 |
+
cur.execute("SELECT * FROM users WHERE username=?", (username,))
|
63 |
+
user = cur.fetchone()
|
64 |
+
conn.close()
|
65 |
+
if not user:
|
66 |
+
return False
|
67 |
+
if not verify_password(password, user[1]):
|
68 |
+
return False
|
69 |
+
return User(username=user[0])
|
70 |
+
|
71 |
+
def create_access_token(data: dict):
|
72 |
+
to_encode = data.copy()
|
73 |
+
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
74 |
+
to_encode.update({"exp": expire})
|
75 |
+
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
|
76 |
+
return encoded_jwt
|
77 |
+
|
78 |
+
@auth_router.post("/token", response_model=Token)
|
79 |
+
async def login(form_data: OAuth2PasswordRequestForm = Depends()):
|
80 |
+
user = authenticate_user(form_data.username, form_data.password)
|
81 |
+
if not user:
|
82 |
+
raise HTTPException(
|
83 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
84 |
+
detail="Incorrect username or password",
|
85 |
+
headers={"WWW-Authenticate": "Bearer"},
|
86 |
+
)
|
87 |
+
access_token = create_access_token(data={"sub": user.username})
|
88 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
89 |
+
|
90 |
+
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
91 |
+
credentials_exception = HTTPException(
|
92 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
93 |
+
detail="Could not validate credentials",
|
94 |
+
headers={"WWW-Authenticate": "Bearer"},
|
95 |
+
)
|
96 |
+
try:
|
97 |
+
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
98 |
+
username: str = payload.get("sub")
|
99 |
+
if username is None:
|
100 |
+
raise credentials_exception
|
101 |
+
user = get_user(username)
|
102 |
+
if user is None:
|
103 |
+
raise credentials_exception
|
104 |
+
return user
|
105 |
+
except JWTError:
|
106 |
+
raise credentials_exception
|
backend/app/chatbot.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from graph import run, graph
|
2 |
+
|
3 |
+
if __name__ == "__main__":
|
4 |
+
app = graph.compile()
|
5 |
+
run(app)
|
backend/app/graph.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langgraph.graph import StateGraph, END
|
3 |
+
from langgraph.checkpoint.memory import MemorySaver
|
4 |
+
# from .prompt import SYSTEM_PROMPT
|
5 |
+
import asyncio
|
6 |
+
from .agents.supervisor import SupervisorAgent
|
7 |
+
from .agents.verification import VerificationAgent, process_tool, verification_route
|
8 |
+
from .agents.medical import MedicalQuestionAgent, medical_route
|
9 |
+
from .agents.rag import RAGTool
|
10 |
+
from .agents.state.state import GraphState
|
11 |
+
from data.preprocessing.vectorstore.get import retriever
|
12 |
+
from langchain_openai import ChatOpenAI
|
13 |
+
from .upload_pdf.ingest_documents import PDFProcessor
|
14 |
+
|
15 |
+
|
16 |
+
pdf_processor = PDFProcessor(file_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data', 'combined_forms', 'temp', 'ACTC-Patient-Packet.pdf')))
|
17 |
+
questions = pdf_processor.extract_questions()
|
18 |
+
questions = [q.content for q in questions]
|
19 |
+
print('QUESTIONS**********************', questions)
|
20 |
+
memory = MemorySaver()
|
21 |
+
|
22 |
+
graph = StateGraph(GraphState)
|
23 |
+
|
24 |
+
supervisor = SupervisorAgent()
|
25 |
+
graph.add_node("supervisor_agent", supervisor)
|
26 |
+
graph.add_node("verification_agent", VerificationAgent())
|
27 |
+
graph.add_node("verification_tool_node", process_tool)
|
28 |
+
graph.add_node("medical_agent", MedicalQuestionAgent(questions=questions))
|
29 |
+
graph.add_node("rag_tool_node", RAGTool(retriever=retriever,
|
30 |
+
llm=ChatOpenAI(model=os.environ["MODEL"])))
|
31 |
+
|
32 |
+
graph.set_entry_point("supervisor_agent")
|
33 |
+
|
34 |
+
graph.add_edge("verification_tool_node", "verification_agent")
|
35 |
+
graph.add_edge("rag_tool_node", "medical_agent")
|
36 |
+
graph.add_conditional_edges(
|
37 |
+
'supervisor_agent',
|
38 |
+
supervisor.route
|
39 |
+
)
|
40 |
+
graph.add_conditional_edges(
|
41 |
+
"verification_agent",
|
42 |
+
verification_route,
|
43 |
+
{"__end__": END, "verification_tool_node": "verification_tool_node"}
|
44 |
+
)
|
45 |
+
graph.add_conditional_edges(
|
46 |
+
"medical_agent",
|
47 |
+
medical_route,
|
48 |
+
{"__end__": END, "rag_tool_node": "rag_tool_node"}
|
49 |
+
)
|
50 |
+
|
51 |
+
|
52 |
+
async def run_verfication(app, fields="", values=""):
|
53 |
+
config = {"configurable": {"thread_id": 1}}
|
54 |
+
|
55 |
+
_input = input('User: ')
|
56 |
+
while _input != 'quit':
|
57 |
+
async for event in app.astream_events({"messages": [('user', _input)], "fields": "full name, birthdate", "values": "John Doe, 1990-01-01"}, config=config, version="v2"):
|
58 |
+
if event['event'] == "on_chat_model_stream":
|
59 |
+
data = event["data"]
|
60 |
+
if data["chunk"].content:
|
61 |
+
print(data["chunk"].content.replace(
|
62 |
+
"\n", ""), end="", flush=True)
|
63 |
+
|
64 |
+
_input = input('\nUser: ')
|
65 |
+
|
66 |
+
|
67 |
+
async def run(app):
|
68 |
+
from langchain_core.messages import AIMessageChunk, HumanMessage
|
69 |
+
config = {"configurable": {"thread_id": 1}}
|
70 |
+
_user_input = input("User: ")
|
71 |
+
|
72 |
+
while _user_input != "quit":
|
73 |
+
out=""
|
74 |
+
astream = app.astream({"messages": [HumanMessage(content=_user_input)], "fields":"full name, birthdate", "values":"John Doe, 1990-01-01"}, config=config, stream_mode="messages")
|
75 |
+
async for msg, metadata in astream:
|
76 |
+
if isinstance(msg, AIMessageChunk):
|
77 |
+
out+=msg.content
|
78 |
+
print('Assistant: ', out)
|
79 |
+
_user_input = input("User: ")
|
80 |
+
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
app = graph.compile(checkpointer=memory)
|
84 |
+
asyncio.run(run(app))
|
85 |
+
|
backend/app/main.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from .auth import auth_router
|
5 |
+
from .webrtc import webrtc_router
|
6 |
+
load_dotenv()
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
app.add_middleware(
|
10 |
+
CORSMiddleware,
|
11 |
+
allow_origins=["*"],
|
12 |
+
allow_credentials=True,
|
13 |
+
allow_methods=["*"],
|
14 |
+
allow_headers=["*"],
|
15 |
+
)
|
16 |
+
|
17 |
+
app.include_router(auth_router)
|
18 |
+
app.include_router(webrtc_router)
|
19 |
+
|
20 |
+
if __name__ == "__main__":
|
21 |
+
import uvicorn
|
22 |
+
uvicorn.run(app, host="0.0.0.0", port=8080)
|
backend/app/upload_pdf/__pycache__/ingest_documents.cpython-311.pyc
ADDED
Binary file (5.01 kB). View file
|
|
backend/app/upload_pdf/__pycache__/questions_agent.cpython-311.pyc
ADDED
Binary file (3.15 kB). View file
|
|
backend/app/upload_pdf/ingest_documents.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import OpenAIEmbeddings
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
4 |
+
from langchain_community.vectorstores import Qdrant
|
5 |
+
from qdrant_client import QdrantClient
|
6 |
+
from qdrant_client.http.models import Distance, VectorParams
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from .questions_agent import workflow
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
CHUNK_SIZE = 500
|
12 |
+
CHUNK_OVERLAP = 200
|
13 |
+
|
14 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
15 |
+
|
16 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
17 |
+
chunk_size=CHUNK_SIZE,
|
18 |
+
chunk_overlap=CHUNK_OVERLAP
|
19 |
+
)
|
20 |
+
|
21 |
+
# Step 1: Parse the PDF and Extract Questions
|
22 |
+
class PDFProcessor:
|
23 |
+
def __init__(self, file_path):
|
24 |
+
self.file_path = file_path
|
25 |
+
self.text = ""
|
26 |
+
self.docs = PyMuPDFLoader(self.file_path).load()
|
27 |
+
|
28 |
+
def extract_text(self):
|
29 |
+
for doc in self.docs:
|
30 |
+
self.text += doc.page_content
|
31 |
+
return self.text
|
32 |
+
|
33 |
+
def extract_questions(self):
|
34 |
+
questions = []
|
35 |
+
chunks = text_splitter.split_text(self.extract_text())
|
36 |
+
|
37 |
+
config = {"configurable":{"thread_id":1}}
|
38 |
+
#state = workflow.get_state(config=config).values
|
39 |
+
question_sets = workflow.batch(config=config, inputs=[{"context":chunk, "previous_questions":[]} for chunk in chunks])
|
40 |
+
|
41 |
+
for item in question_sets:
|
42 |
+
questions.extend(item.get("previous_questions", []))
|
43 |
+
|
44 |
+
return questions
|
45 |
+
|
46 |
+
|
47 |
+
# Step 2: Split Questions and Prepare for Vector Database
|
48 |
+
class QuestionIngestor:
|
49 |
+
def __init__(self, questions):
|
50 |
+
self.questions = questions
|
51 |
+
|
52 |
+
def split_questions(self):
|
53 |
+
# Using a Text Splitter to handle long questions
|
54 |
+
splitter = RecursiveCharacterTextSplitter(
|
55 |
+
chunk_size=500,
|
56 |
+
chunk_overlap=20
|
57 |
+
)
|
58 |
+
docs = splitter.create_documents(self.questions)
|
59 |
+
return docs
|
60 |
+
|
61 |
+
# Step 3: Setup Qdrant Vector Store and Index Data
|
62 |
+
class QdrantSetup:
|
63 |
+
def __init__(self, questions):
|
64 |
+
self.questions = questions
|
65 |
+
self.qdrant_client = QdrantClient("localhost", port=6333)
|
66 |
+
self.embedding = OpenAIEmbeddings()
|
67 |
+
|
68 |
+
def setup_qdrant(self):
|
69 |
+
# Create a Qdrant collection for questions
|
70 |
+
self.qdrant_client.recreate_collection(
|
71 |
+
collection_name="questions",
|
72 |
+
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
73 |
+
)
|
74 |
+
|
75 |
+
def index_questions(self):
|
76 |
+
# Index questions into Qdrant
|
77 |
+
qdrant_vectorstore = Qdrant(
|
78 |
+
client=self.qdrant_client,
|
79 |
+
collection_name="questions",
|
80 |
+
embedding=self.embedding
|
81 |
+
)
|
82 |
+
qdrant_vectorstore.add_documents(self.questions)
|
83 |
+
# USE Parent Document Embedding for categorization
|
backend/app/upload_pdf/questions_agent.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import ChatOpenAI
|
2 |
+
from langchain_core.messages import HumanMessage
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
4 |
+
from langchain_core.output_parsers import StrOutputParser
|
5 |
+
from langgraph.graph import StateGraph
|
6 |
+
from typing_extensions import Annotated, TypedDict
|
7 |
+
from langgraph.graph import add_messages, END
|
8 |
+
from langgraph.checkpoint.memory import MemorySaver
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
14 |
+
memory = MemorySaver()
|
15 |
+
|
16 |
+
class State(TypedDict):
|
17 |
+
previous_questions: Annotated[list, add_messages]
|
18 |
+
context:str
|
19 |
+
|
20 |
+
prompt = ChatPromptTemplate.from_template(
|
21 |
+
"""
|
22 |
+
You are an expert at ingesting documents and creating questions for a medical questionnaire to be answered by patients with a high school level education. Given the following context that should contain medical questions, and from only this context extract all medical questions separated by '|' that would be appropriate for a patient to answer. Indicate if the question is a multiple choice and the include the possible choices. If there are no medical questions in the context, output 'None'.
|
23 |
+
|
24 |
+
Context:
|
25 |
+
{context}
|
26 |
+
"""
|
27 |
+
)
|
28 |
+
|
29 |
+
def create_questions(state):
|
30 |
+
results = (prompt | llm | StrOutputParser()).invoke(state)
|
31 |
+
questions = results.split("|")
|
32 |
+
|
33 |
+
questions = [q for q in questions if q and q != 'None']
|
34 |
+
return {"previous_questions":questions, "context":state.get("context","") or ''}
|
35 |
+
|
36 |
+
graph = StateGraph(State)
|
37 |
+
|
38 |
+
graph.add_node("create_questions", create_questions)
|
39 |
+
graph.set_entry_point("create_questions")
|
40 |
+
graph.add_edge("create_questions", END)
|
41 |
+
workflow = graph.compile(checkpointer=memory)
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
backend/app/webrtc.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from fastapi import APIRouter, Depends, WebSocket
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
+
from .auth import get_current_user
|
6 |
+
from aiortc import RTCPeerConnection, RTCSessionDescription
|
7 |
+
from langchain_openai import ChatOpenAI
|
8 |
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
9 |
+
from langchain.memory import ConversationBufferMemory
|
10 |
+
from langchain_core.messages import HumanMessage, RemoveMessage, AIMessageChunk
|
11 |
+
from langgraph.checkpoint.memory import MemorySaver
|
12 |
+
from .graph import graph
|
13 |
+
from typing import Dict
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
memory = MemorySaver()
|
17 |
+
app = graph.compile(checkpointer=memory)
|
18 |
+
|
19 |
+
webrtc_router = APIRouter()
|
20 |
+
|
21 |
+
# Initialize OpenAI
|
22 |
+
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
|
23 |
+
|
24 |
+
prompt = ChatPromptTemplate.from_messages([
|
25 |
+
("system", "You are expert in asking questions. Your goal is to ask full name, age, and address of a person."),
|
26 |
+
MessagesPlaceholder(variable_name="messages")
|
27 |
+
])
|
28 |
+
|
29 |
+
chain = prompt | llm #remove this
|
30 |
+
user_memories: Dict[str, ConversationBufferMemory] = {} #remove this
|
31 |
+
|
32 |
+
|
33 |
+
@webrtc_router.post("/webrtc/offer")
|
34 |
+
async def webrtc_offer(offer: dict, current_user: dict = Depends(get_current_user)):
|
35 |
+
pc = RTCPeerConnection()
|
36 |
+
offer_obj = RTCSessionDescription(sdp=offer["sdp"], type=offer["type"])
|
37 |
+
|
38 |
+
await pc.setRemoteDescription(offer_obj)
|
39 |
+
answer = await pc.createAnswer()
|
40 |
+
await pc.setLocalDescription(answer)
|
41 |
+
|
42 |
+
# Create a new memory for the user if it doesn't exist
|
43 |
+
if current_user.username not in user_memories:
|
44 |
+
user_memories[current_user.username] = ConversationBufferMemory(
|
45 |
+
return_messages=True)
|
46 |
+
|
47 |
+
@pc.on("datachannel")
|
48 |
+
def on_datachannel(channel):
|
49 |
+
@channel.on("message")
|
50 |
+
async def on_message(message):
|
51 |
+
# Process the message using LangChain
|
52 |
+
memory = user_memories[current_user.username]
|
53 |
+
user_message = HumanMessage(content=message)
|
54 |
+
|
55 |
+
memory.chat_memory.add_user_message(user_message) #r
|
56 |
+
|
57 |
+
config = {"configurable": {"thread_id": current_user.username}}
|
58 |
+
|
59 |
+
astream = app.astream({"messages": [user_message], "fields":"full name, birthdate", "values":"John Doe, 1990-01-01"}, config=config, stream_mode="messages")
|
60 |
+
async for msg, metadata in astream:
|
61 |
+
if isinstance(msg, AIMessageChunk):
|
62 |
+
channel.send(msg.content)
|
63 |
+
|
64 |
+
return JSONResponse(content={
|
65 |
+
"sdp": pc.localDescription.sdp,
|
66 |
+
"type": pc.localDescription.type
|
67 |
+
})
|
68 |
+
|
69 |
+
|
70 |
+
@webrtc_router.post("/webrtc/ice-candidate")
|
71 |
+
async def webrtc_ice_candidate(candidate: dict, current_user: dict = Depends(get_current_user)):
|
72 |
+
# In a real-world scenario, you'd store and forward this candidate to the other peer
|
73 |
+
return JSONResponse(content={"status": "success"})
|
74 |
+
|
75 |
+
|
76 |
+
@webrtc_router.post("/webrtc/clear_memory")
|
77 |
+
async def webrtc_clear_memory(obj: dict, current_user: dict = Depends(get_current_user)):
|
78 |
+
config = {"configurable": {"thread_id": current_user.username}}
|
79 |
+
state = app.get_state(config=config)
|
80 |
+
messages = state.values.get("messages", [])
|
81 |
+
for message in messages:
|
82 |
+
app.update_state(config, {"messages": RemoveMessage(id=message.id)})
|
83 |
+
return JSONResponse(content={"status": "success"})
|
backend/data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
backend/data/__init__.py
ADDED
File without changes
|
backend/data/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (239 Bytes). View file
|
|
backend/data/combined_forms/temp/ACTC-Patient-Packet.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a260c610ba55458698fb28200aa6b74273c462d94a3f253cd545b9036f97406f
|
3 |
+
size 607119
|
backend/data/preprocessing/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
backend/data/preprocessing/__pycache__/ingest_documents.cpython-311.pyc
ADDED
Binary file (6.67 kB). View file
|
|
backend/data/preprocessing/__pycache__/questions_agent.cpython-311.pyc
ADDED
Binary file (3.15 kB). View file
|
|
backend/data/preprocessing/ingest_documents.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import OpenAIEmbeddings
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
4 |
+
from langchain_community.vectorstores import Qdrant
|
5 |
+
from qdrant_client import QdrantClient
|
6 |
+
from qdrant_client.http.models import Distance, VectorParams
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from .questions_agent import workflow
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
CHUNK_SIZE = 500
|
12 |
+
CHUNK_OVERLAP = 200
|
13 |
+
|
14 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
15 |
+
|
16 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
17 |
+
chunk_size=CHUNK_SIZE,
|
18 |
+
chunk_overlap=CHUNK_OVERLAP
|
19 |
+
)
|
20 |
+
|
21 |
+
# Step 1: Parse the PDF and Extract Questions
|
22 |
+
class PDFProcessor:
|
23 |
+
def __init__(self, file_path):
|
24 |
+
self.file_path = file_path
|
25 |
+
self.text = ""
|
26 |
+
self.docs = PyMuPDFLoader(self.file_path).load()
|
27 |
+
|
28 |
+
def extract_text(self):
|
29 |
+
for doc in self.docs:
|
30 |
+
self.text += doc.page_content
|
31 |
+
return self.text
|
32 |
+
|
33 |
+
def extract_questions(self):
|
34 |
+
questions = []
|
35 |
+
chunks = text_splitter.split_text(self.extract_text())
|
36 |
+
|
37 |
+
config = {"configurable":{"thread_id":1}}
|
38 |
+
#state = workflow.get_state(config=config).values
|
39 |
+
question_sets = workflow.batch(config=config, inputs=[{"context":chunk, "previous_questions":[]} for chunk in chunks])
|
40 |
+
|
41 |
+
for item in question_sets:
|
42 |
+
questions.extend(item.get("previous_questions", []))
|
43 |
+
|
44 |
+
return questions
|
45 |
+
|
46 |
+
|
47 |
+
# Step 2: Split Questions and Prepare for Vector Database
|
48 |
+
class QuestionIngestor:
|
49 |
+
def __init__(self, questions):
|
50 |
+
self.questions = questions
|
51 |
+
|
52 |
+
def split_questions(self):
|
53 |
+
# Using a Text Splitter to handle long questions
|
54 |
+
splitter = RecursiveCharacterTextSplitter(
|
55 |
+
chunk_size=500,
|
56 |
+
chunk_overlap=20
|
57 |
+
)
|
58 |
+
docs = splitter.create_documents(self.questions)
|
59 |
+
return docs
|
60 |
+
|
61 |
+
# Step 3: Setup Qdrant Vector Store and Index Data
|
62 |
+
class QdrantSetup:
|
63 |
+
def __init__(self, questions):
|
64 |
+
self.questions = questions
|
65 |
+
self.qdrant_client = QdrantClient("localhost", port=6333)
|
66 |
+
self.embedding = OpenAIEmbeddings()
|
67 |
+
|
68 |
+
def setup_qdrant(self):
|
69 |
+
# Create a Qdrant collection for questions
|
70 |
+
self.qdrant_client.recreate_collection(
|
71 |
+
collection_name="questions",
|
72 |
+
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
73 |
+
)
|
74 |
+
|
75 |
+
def index_questions(self):
|
76 |
+
# Index questions into Qdrant
|
77 |
+
qdrant_vectorstore = Qdrant(
|
78 |
+
client=self.qdrant_client,
|
79 |
+
collection_name="questions",
|
80 |
+
embedding=self.embedding
|
81 |
+
)
|
82 |
+
qdrant_vectorstore.add_documents(self.questions)
|
83 |
+
# USE Parent Document Embedding for categorization
|
backend/data/preprocessing/questions_agent.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import ChatOpenAI
|
2 |
+
from langchain_core.messages import HumanMessage
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
4 |
+
from langchain_core.output_parsers import StrOutputParser
|
5 |
+
from langgraph.graph import StateGraph
|
6 |
+
from typing_extensions import Annotated, TypedDict
|
7 |
+
from langgraph.graph import add_messages, END
|
8 |
+
from langgraph.checkpoint.memory import MemorySaver
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
14 |
+
memory = MemorySaver()
|
15 |
+
|
16 |
+
class State(TypedDict):
|
17 |
+
previous_questions: Annotated[list, add_messages]
|
18 |
+
context:str
|
19 |
+
|
20 |
+
prompt = ChatPromptTemplate.from_template(
|
21 |
+
"""
|
22 |
+
You are an expert at ingesting documents and creating questions for a medical questionnaire to be answered by patients with a high school level education. Given the following context that should contain medical questions, and from only this context extract all medical questions separated by '|' that would be appropriate for a patient to answer. Indicate if the question is a multiple choice and the include the possible choices. If there are no medical questions in the context, output 'None'.
|
23 |
+
|
24 |
+
Context:
|
25 |
+
{context}
|
26 |
+
"""
|
27 |
+
)
|
28 |
+
|
29 |
+
def create_questions(state):
|
30 |
+
results = (prompt | llm | StrOutputParser()).invoke(state)
|
31 |
+
questions = results.split("|")
|
32 |
+
|
33 |
+
questions = [q for q in questions if q and q != 'None']
|
34 |
+
return {"previous_questions":questions, "context":state.get("context","") or ''}
|
35 |
+
|
36 |
+
graph = StateGraph(State)
|
37 |
+
|
38 |
+
graph.add_node("create_questions", create_questions)
|
39 |
+
graph.set_entry_point("create_questions")
|
40 |
+
graph.add_edge("create_questions", END)
|
41 |
+
workflow = graph.compile(checkpointer=memory)
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
backend/data/preprocessing/vectorstore/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
backend/data/preprocessing/vectorstore/__pycache__/get.cpython-311.pyc
ADDED
Binary file (1.36 kB). View file
|
|