richlai commited on
Commit
8b1e853
·
1 Parent(s): fc7c417
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +38 -0
  3. README-orig.md +16 -0
  4. README.md +7 -6
  5. backend/.DS_Store +0 -0
  6. backend/__init__.py +0 -0
  7. backend/__pycache__/__init__.cpython-311.pyc +0 -0
  8. backend/app/__init__.py +0 -0
  9. backend/app/__pycache__/__init__.cpython-311.pyc +0 -0
  10. backend/app/__pycache__/auth.cpython-311.pyc +0 -0
  11. backend/app/__pycache__/graph.cpython-311.pyc +0 -0
  12. backend/app/__pycache__/main.cpython-311.pyc +0 -0
  13. backend/app/__pycache__/prompt.cpython-311.pyc +0 -0
  14. backend/app/__pycache__/webrtc.cpython-311.pyc +0 -0
  15. backend/app/agents/__initi__.py +0 -0
  16. backend/app/agents/__pycache__/medical.cpython-311.pyc +0 -0
  17. backend/app/agents/__pycache__/prompt.cpython-311.pyc +0 -0
  18. backend/app/agents/__pycache__/rag.cpython-311.pyc +0 -0
  19. backend/app/agents/__pycache__/supervisor.cpython-311.pyc +0 -0
  20. backend/app/agents/__pycache__/validation.cpython-311.pyc +0 -0
  21. backend/app/agents/__pycache__/verification.cpython-311.pyc +0 -0
  22. backend/app/agents/medical.py +55 -0
  23. backend/app/agents/prompt.py +38 -0
  24. backend/app/agents/rag.py +73 -0
  25. backend/app/agents/state/__init__.py +0 -0
  26. backend/app/agents/state/__pycache__/__init__.cpython-311.pyc +0 -0
  27. backend/app/agents/state/__pycache__/state.cpython-311.pyc +0 -0
  28. backend/app/agents/state/state.py +28 -0
  29. backend/app/agents/supervisor.py +22 -0
  30. backend/app/agents/verification.py +86 -0
  31. backend/app/auth.py +106 -0
  32. backend/app/chatbot.py +5 -0
  33. backend/app/graph.py +85 -0
  34. backend/app/main.py +22 -0
  35. backend/app/upload_pdf/__pycache__/ingest_documents.cpython-311.pyc +0 -0
  36. backend/app/upload_pdf/__pycache__/questions_agent.cpython-311.pyc +0 -0
  37. backend/app/upload_pdf/ingest_documents.py +83 -0
  38. backend/app/upload_pdf/questions_agent.py +49 -0
  39. backend/app/webrtc.py +83 -0
  40. backend/data/.DS_Store +0 -0
  41. backend/data/__init__.py +0 -0
  42. backend/data/__pycache__/__init__.cpython-311.pyc +0 -0
  43. backend/data/combined_forms/temp/ACTC-Patient-Packet.pdf +3 -0
  44. backend/data/preprocessing/.DS_Store +0 -0
  45. backend/data/preprocessing/__pycache__/ingest_documents.cpython-311.pyc +0 -0
  46. backend/data/preprocessing/__pycache__/questions_agent.cpython-311.pyc +0 -0
  47. backend/data/preprocessing/ingest_documents.py +83 -0
  48. backend/data/preprocessing/questions_agent.py +49 -0
  49. backend/data/preprocessing/vectorstore/.DS_Store +0 -0
  50. backend/data/preprocessing/vectorstore/__pycache__/get.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ curl \
5
+ && curl -fsSL https://deb.nodesource.com/setup_lts.20.15.1| bash - \
6
+ && apt-get install -y nodejs \
7
+ && apt-get clean \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ RUN apt-get update && apt-get install -y npm
11
+
12
+ # Verify installation
13
+ RUN node --version
14
+ RUN npm --version
15
+ RUN useradd -m -u 1000 user
16
+ USER user
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+ WORKDIR $HOME/app
20
+ COPY --chown=user . $HOME/app
21
+ COPY ./backend/requirements.txt ~/app/requirements.txt
22
+ RUN pip install -r requirements.txt
23
+ COPY . .
24
+
25
+ USER root
26
+ WORKDIR $HOME/app/frontend
27
+ RUN npm install
28
+ RUN npm audit fix
29
+ RUN npm run build
30
+ RUN chown -R user:user $HOME/app/frontend
31
+ RUN chown -R user:user $HOME/app/backend
32
+ USER user
33
+
34
+ # Change back to app directory
35
+ WORKDIR $HOME/app
36
+ EXPOSE 5173
37
+ EXPOSE 8080
38
+ CMD ["/bin/bash", "-c", "./run.sh"]
README-orig.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chat Patient Intake
3
+ emoji: 📚
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ app_port: 5173
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ ```
15
+
16
+ ```
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
- title: Aie4 Final
3
- emoji: 💻
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
- short_description: Final Project
 
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Chat Patient Intake
3
+ emoji: 📚
4
+ colorFrom: green
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
+ app_port: 5173
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
backend/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/__init__.py ADDED
File without changes
backend/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (234 Bytes). View file
 
backend/app/__init__.py ADDED
File without changes
backend/app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (193 Bytes). View file
 
backend/app/__pycache__/auth.cpython-311.pyc ADDED
Binary file (6.3 kB). View file
 
backend/app/__pycache__/graph.cpython-311.pyc ADDED
Binary file (5.5 kB). View file
 
backend/app/__pycache__/main.cpython-311.pyc ADDED
Binary file (1.08 kB). View file
 
backend/app/__pycache__/prompt.cpython-311.pyc ADDED
Binary file (854 Bytes). View file
 
backend/app/__pycache__/webrtc.cpython-311.pyc ADDED
Binary file (5.77 kB). View file
 
backend/app/agents/__initi__.py ADDED
File without changes
backend/app/agents/__pycache__/medical.cpython-311.pyc ADDED
Binary file (3.28 kB). View file
 
backend/app/agents/__pycache__/prompt.cpython-311.pyc ADDED
Binary file (3.38 kB). View file
 
backend/app/agents/__pycache__/rag.cpython-311.pyc ADDED
Binary file (4.67 kB). View file
 
backend/app/agents/__pycache__/supervisor.cpython-311.pyc ADDED
Binary file (1.6 kB). View file
 
backend/app/agents/__pycache__/validation.cpython-311.pyc ADDED
Binary file (1.12 kB). View file
 
backend/app/agents/__pycache__/verification.cpython-311.pyc ADDED
Binary file (5.66 kB). View file
 
backend/app/agents/medical.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain_core.tools import tool
6
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
7
+ from langgraph.graph import END
8
+ from .prompt import SYSTEM_PROMPT, CONTEXT_PROMPT, QUESTION_PROMPT
9
+ load_dotenv()
10
+ MODEL = os.getenv("MODEL")
11
+
12
+ @tool
13
+ def user_query(query:str):
14
+ """
15
+ Call this tool to retrieve the context of the conversation for the user's query which is an unambiguous and concise query with enough context from the message history.
16
+ """
17
+ return query
18
+
19
+ @tool
20
+ def completed(**kwargs):
21
+ """
22
+ Call this tool when allmedical questions have been completed.
23
+ """
24
+ return True
25
+
26
+ tools_by_name = {
27
+ "user_query": user_query,
28
+ "completed": completed
29
+ }
30
+
31
+ def medical_route(state):
32
+ if not state["messages"]:
33
+ return END
34
+ last_message = state["messages"][-1]
35
+ if last_message.tool_calls:
36
+ return "rag_tool_node"
37
+ else:
38
+ return END
39
+
40
+ class MedicalQuestionAgent:
41
+ def __init__(self, questions=[]):
42
+ self.prompt = ChatPromptTemplate.from_messages([
43
+ ("system", SYSTEM_PROMPT),
44
+ ("system", QUESTION_PROMPT),
45
+ ('system', CONTEXT_PROMPT),
46
+ MessagesPlaceholder(variable_name="messages")
47
+ ])
48
+ self.llm = ChatOpenAI(model=MODEL, temperature=0, streaming=True)
49
+ self.chain = self.prompt | self.llm.bind_tools([user_query, completed])
50
+ self.questions = questions
51
+
52
+ def __call__(self, state):
53
+ results = self.chain.invoke({**state, "questions": self.questions})
54
+ return {**state, "messages":[results] }
55
+
backend/app/agents/prompt.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT="""
2
+ You are a helpful, empathetic and polite medical receptionist named Jane, assisting with basic administrative tasks. Introduce yourself as Jane in your first message and announce your purpose which is to obtain information from the patient for upcoming appointment. You are to be concise and be on track in obtaining information from the patient. You are not a medical professional, so if a patient asks a medical question, you must politely remind them that you cannot provide medical advice or diagnoses. You should always encourage the patient to speak directly with their healthcare provider for medical concerns. You are to always ask the patient one question at a time and wait for the patient's response before asking the next question. Assume the patient has a high level of literacy and is able to understand and respond to your questions.
3
+ """
4
+
5
+
6
+ VERIFICATION_PROMPT="""
7
+ You are an expert in verification and validation. Given a list of fields, you are to retrieve information from the user for those fields. You also have the correct answers to these fields, allowing you to check if the user's responses are correct. You are not repeat the same question or ask a question that has already been asnwered by the user.
8
+
9
+ For fields that involve dates:
10
+
11
+ 1. Parsing the Date: Accept the user's date input in any common format (e.g., "Jan 1 1990", "1st January 1990", "1990/01/01").
12
+ 2. Converting to ISO 8601 date format: Convert the parsed date into the yyyy-MM-dd format.
13
+ 3. Comparison: Compare the converted date with the correct answer you have.
14
+
15
+ If the user's date input, after conversion, matches the correct answer, consider it correct. If not, proceed as follows:
16
+ - Invalid Response Handling: Call the invalid tool without mentioning any correct values or hints.
17
+ For non-date fields:
18
+
19
+ - If the user's response is incorrect, call the 'invalid' tool with the value for the corresponding field you are verifying.
20
+
21
+ Additional Guidelines:
22
+
23
+ - Never reveal any of the correct answers to the user unless they have already provided that exact value.
24
+ - If you have successfully verified all fields, call the 'completed' tool.
25
+ - Never end with a farewell or goodbye.
26
+ """
27
+
28
+ QUESTION_PROMPT = """\
29
+ You are an expert in asking questions to the patient. You are to ask one question at a time and wait for the patient's response before asking the next question. You are not repeat the same question unless the user has not answered it correctly or fully. Once all questions are answered, call the 'completed' tool.
30
+
31
+ QUESTIONS:
32
+ {questions}
33
+
34
+ """
35
+
36
+ CONTEXT_PROMPT = """\
37
+ In addition to asking the patient questions, you are a friendly assistant that helps users answer their questions or responds to their comments. Only answer questions or respond to the comments with the context and message history. Do not make up information. Any query by the user that cannot be answered by you must call the 'user_query' function to retrieve the context for the user's query. Do not jump back to answering questions until you confirm that their questions have been answered if they did have questions or a query.
38
+ """
backend/app/agents/rag.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
3
+ from langchain_openai import ChatOpenAI
4
+ from operator import itemgetter
5
+ from langchain.schema.runnable import RunnablePassthrough
6
+ from .medical import tools_by_name
7
+
8
+ SYSTEM_PROMPT = """\
9
+ You are an expert in answering questions succintly and correctly only within context and chat history. Answer user's last question on the chat history and look at the message history for further context if needed. If you are not able to answer the last question of user based on the context reply with "I don't know". Never make up an answer.
10
+ """
11
+
12
+ CONTEXT_PROMPT = """\
13
+ Context:
14
+ {context}
15
+
16
+ Chat History:
17
+ {chat_history}
18
+ """
19
+
20
+ def map_messages(messages):
21
+ text=""
22
+ for message in messages:
23
+ if isinstance(message, HumanMessage):
24
+ text += f"Human: {message.content}\n"
25
+ elif isinstance(message, AIMessage):
26
+ text += f"AI: {message.content}\n"
27
+ return text
28
+
29
+ class RAGTool:
30
+ def __init__(self, llm: ChatOpenAI, retriever:None):
31
+ self.retriever = retriever
32
+ self.llm = llm
33
+ self.prompt = ChatPromptTemplate.from_messages([
34
+ ("system", SYSTEM_PROMPT),
35
+ ('user', CONTEXT_PROMPT)
36
+ ])
37
+
38
+ self.llm_chain = ({"context": itemgetter("question") | retriever, "chat_history": itemgetter("question")}
39
+ | RunnablePassthrough.assign(context=itemgetter("context"))
40
+ | {"response": self.prompt | self.llm, "context": itemgetter("context")}
41
+ )
42
+
43
+
44
+ def __call__(self, state):
45
+ last_message = state["messages"][-1]
46
+ messages = []
47
+ for tool_call in last_message.tool_calls:
48
+ print('TOOL CALL**********************',tools_by_name[tool_call["name"]], tools_by_name[tool_call["name"]].invoke({**tool_call["args"]}))
49
+ if tool_call["name"] == "user_query":
50
+ query = tools_by_name[tool_call["name"]].invoke({**tool_call["args"]})
51
+ response = self.retriever.invoke(query)
52
+ print('RESPONSE**********************', response)
53
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"],
54
+ content=f"Context:\n{response}"))
55
+ elif tool_call["name"] == "completed":
56
+ state["next"]+=1
57
+ print("COMPLETED!!!!!", state["next"])
58
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content="Medical Intake complete. Tell the user or patient that we are done with the intake process. Give them a professional and friendly farewell and mention about looking forward to seeing them at the appointment."))
59
+ else:
60
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=""))
61
+
62
+ return {**state, "messages": messages}
63
+
64
+ '''
65
+ user_input = state["question"]
66
+ result = self.llm_chain.invoke(
67
+ {"chat_history": map_messages(state["messages"])+f'Human: {user_input}'})
68
+ ai_message = result["response"]
69
+ context = result["context"]
70
+ return {**state, "messages": [ai_message], "context": context}
71
+ '''
72
+
73
+
backend/app/agents/state/__init__.py ADDED
File without changes
backend/app/agents/state/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (251 Bytes). View file
 
backend/app/agents/state/__pycache__/state.cpython-311.pyc ADDED
Binary file (1.88 kB). View file
 
backend/app/agents/state/state.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing_extensions import TypedDict
2
+ from typing import Annotated
3
+ from langgraph.graph import add_messages
4
+
5
+ class IdentificationState(TypedDict):
6
+ messages: Annotated[list, add_messages]
7
+ fields:str
8
+ values:str
9
+ counter:int
10
+
11
+ class IntakeRAGState(TypedDict):
12
+ messages: Annotated[list, add_messages]
13
+ question: str #current user input. It may or may not be a 'question'
14
+ context: str
15
+
16
+ class SupervisorState(TypedDict):
17
+ #messages: Annotated[list, add_messages]
18
+ next: str #next step in the workflow
19
+
20
+ class GraphState(TypedDict):
21
+ messages: Annotated[list, add_messages]
22
+ fields:str
23
+ values:str
24
+ counter:int
25
+ question: str #current user input. It may or may not be a 'question'
26
+ context: str
27
+ completed: str
28
+ next:int
backend/app/agents/supervisor.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import END
2
+ class SupervisorAgent:
3
+ def __init__(self):
4
+ self.order = ['verification_agent', 'medical_agent']
5
+ self.next = 0
6
+
7
+ def __call__(self, state):
8
+ if not state.get('completed'):
9
+ state["completed"] = ""
10
+ if not state.get('next'):
11
+ state["next"] = 0
12
+ if not state.get('context'):
13
+ state["context"] = ""
14
+
15
+ return state
16
+
17
+ def route(self, state):
18
+ if self.next >= len(self.order):
19
+ return END
20
+ print("SUPERVISOR route", state["next"])
21
+ return self.order[state["next"]]
22
+
backend/app/agents/verification.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain_core.tools import tool
6
+ from langchain_core.messages import HumanMessage, ToolMessage
7
+ from langgraph.graph import END
8
+ from .prompt import VERIFICATION_PROMPT, SYSTEM_PROMPT
9
+ load_dotenv()
10
+ MODEL = os.getenv("MODEL")
11
+
12
+ @tool
13
+ def invalid(field:str, value:str, counter=1):
14
+ """
15
+ Call this tool if the user's response is not valid for one of the fields you are verifying.
16
+ """
17
+ if counter >= 2:
18
+ return f"The user's response for {field} with value {value} is not valid. Politely end the conversation and after ask them to call the support number."
19
+ else:
20
+ return f"The user's response for {field} with value {value} is not valid. Indicate to the user that it does not match our records. Please ask the user one more time."
21
+
22
+ @tool
23
+ def completed(**kwargs):
24
+ """
25
+ Call this tool when verification is complete and successful.
26
+ """
27
+ return "The verification is complete. Moving on to medical questions."
28
+
29
+ tools_by_name = {
30
+ "invalid": invalid,
31
+ "completed": completed
32
+ }
33
+
34
+ def verification_route(state):
35
+ if not state["messages"]:
36
+ return END
37
+ last_message = state["messages"][-1]
38
+ if last_message.tool_calls:
39
+ return "verification_tool_node"
40
+ else:
41
+ return END
42
+
43
+ class VerificationAgent:
44
+ def __init__(self):
45
+ self.prompt = ChatPromptTemplate.from_messages([
46
+ ("system", SYSTEM_PROMPT),
47
+ ("system", VERIFICATION_PROMPT),
48
+ ("system", "Fields:{fields}"),
49
+ ("system", "Values:{values}"),
50
+ MessagesPlaceholder(variable_name="messages")
51
+ ])
52
+ self.llm = ChatOpenAI(model=MODEL, temperature=0, streaming=True)
53
+ self.chain = self.prompt | self.llm.bind_tools([invalid, completed])
54
+
55
+ def __call__(self, state):
56
+ result = self.chain.invoke(state)
57
+ if not state.get("counter") or not result.tool_calls:
58
+ state["counter"] = 0
59
+ return {**state, "messages": [result]}
60
+
61
+ def process_tool(state):
62
+ last_message = state["messages"][-1]
63
+ state["counter"] = state.get("counter")+1
64
+ #print('LAST MESSAGE**********************', last_message)
65
+ messages = []
66
+ for tool_call in last_message.tool_calls:
67
+
68
+ if tool_call["name"] == "invalid":
69
+ #print('TOOL CALL**********************', tools_by_name[tool_call["name"]].invoke({**tool_call["args"], "counter": state["counter"]}))
70
+ message = tools_by_name[tool_call["name"]].invoke({**tool_call["args"], "counter": state["counter"]})
71
+ if state["counter"] >= 2:
72
+ state["counter"] = 0
73
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=message))
74
+ else:
75
+ state["counter"] += 1
76
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=f"The user's response for {tool_call['args']['field']} is not valid. Indicate to the user that it does not match our records. Please ask the user one more time."))
77
+ elif tool_call["name"] == "completed":
78
+ state["next"]+=1
79
+ print("COMPLETED!!!!!", state["next"])
80
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content="Verification complete. Prompt the user that we are moving on to medical questions. Do not end with a farwell. Mention that during the next stage the patient can ask any questions they have."))
81
+ else:
82
+ messages.append(ToolMessage(name=tool_call["name"], tool_call_id=tool_call["id"], content=""))
83
+ return {**state, "messages": messages}
84
+
85
+
86
+
backend/app/auth.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, status
2
+ from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
3
+ from jose import JWTError, jwt
4
+ from passlib.context import CryptContext
5
+ from pydantic import BaseModel
6
+ from datetime import datetime, timedelta, timezone
7
+ import sqlite3
8
+ import os
9
+
10
+ SECRET_KEY = "your-secret-key"
11
+ ALGORITHM = "HS256"
12
+ ACCESS_TOKEN_EXPIRE_MINUTES = 300
13
+
14
+ pwd_context = CryptContext(schemes=["django_pbkdf2_sha256"], deprecated="auto")
15
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
16
+
17
+ auth_router = APIRouter()
18
+
19
+ class User(BaseModel):
20
+ username: str
21
+
22
+ class Token(BaseModel):
23
+ access_token: str
24
+ token_type: str
25
+
26
+ # SQLite setup
27
+ DB_NAME = "users.db"
28
+
29
+ def init_db():
30
+ conn = sqlite3.connect(DB_NAME)
31
+ cur = conn.cursor()
32
+ cur.execute('''CREATE TABLE IF NOT EXISTS users
33
+ (username TEXT PRIMARY KEY, hashed_password TEXT)''')
34
+
35
+ # Add sample users if they don't exist
36
+ cur.execute("INSERT OR IGNORE INTO users VALUES (?, ?)",
37
+ ("admin", pwd_context.hash("admin")))
38
+
39
+
40
+ conn.commit()
41
+ conn.close()
42
+
43
+ # Initialize the database
44
+ init_db()
45
+
46
+ def verify_password(plain_password, hashed_password):
47
+ return pwd_context.verify(plain_password, hashed_password)
48
+
49
+ def get_user(username: str):
50
+ conn = sqlite3.connect(DB_NAME)
51
+ cur = conn.cursor()
52
+ cur.execute("SELECT * FROM users WHERE username=?", (username,))
53
+ user = cur.fetchone()
54
+ conn.close()
55
+ if user:
56
+ return User(username=user[0])
57
+ return None
58
+
59
+ def authenticate_user(username: str, password: str):
60
+ conn = sqlite3.connect(DB_NAME)
61
+ cur = conn.cursor()
62
+ cur.execute("SELECT * FROM users WHERE username=?", (username,))
63
+ user = cur.fetchone()
64
+ conn.close()
65
+ if not user:
66
+ return False
67
+ if not verify_password(password, user[1]):
68
+ return False
69
+ return User(username=user[0])
70
+
71
+ def create_access_token(data: dict):
72
+ to_encode = data.copy()
73
+ expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
74
+ to_encode.update({"exp": expire})
75
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
76
+ return encoded_jwt
77
+
78
+ @auth_router.post("/token", response_model=Token)
79
+ async def login(form_data: OAuth2PasswordRequestForm = Depends()):
80
+ user = authenticate_user(form_data.username, form_data.password)
81
+ if not user:
82
+ raise HTTPException(
83
+ status_code=status.HTTP_401_UNAUTHORIZED,
84
+ detail="Incorrect username or password",
85
+ headers={"WWW-Authenticate": "Bearer"},
86
+ )
87
+ access_token = create_access_token(data={"sub": user.username})
88
+ return {"access_token": access_token, "token_type": "bearer"}
89
+
90
+ async def get_current_user(token: str = Depends(oauth2_scheme)):
91
+ credentials_exception = HTTPException(
92
+ status_code=status.HTTP_401_UNAUTHORIZED,
93
+ detail="Could not validate credentials",
94
+ headers={"WWW-Authenticate": "Bearer"},
95
+ )
96
+ try:
97
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
98
+ username: str = payload.get("sub")
99
+ if username is None:
100
+ raise credentials_exception
101
+ user = get_user(username)
102
+ if user is None:
103
+ raise credentials_exception
104
+ return user
105
+ except JWTError:
106
+ raise credentials_exception
backend/app/chatbot.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from graph import run, graph
2
+
3
+ if __name__ == "__main__":
4
+ app = graph.compile()
5
+ run(app)
backend/app/graph.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langgraph.graph import StateGraph, END
3
+ from langgraph.checkpoint.memory import MemorySaver
4
+ # from .prompt import SYSTEM_PROMPT
5
+ import asyncio
6
+ from .agents.supervisor import SupervisorAgent
7
+ from .agents.verification import VerificationAgent, process_tool, verification_route
8
+ from .agents.medical import MedicalQuestionAgent, medical_route
9
+ from .agents.rag import RAGTool
10
+ from .agents.state.state import GraphState
11
+ from data.preprocessing.vectorstore.get import retriever
12
+ from langchain_openai import ChatOpenAI
13
+ from .upload_pdf.ingest_documents import PDFProcessor
14
+
15
+
16
+ pdf_processor = PDFProcessor(file_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data', 'combined_forms', 'temp', 'ACTC-Patient-Packet.pdf')))
17
+ questions = pdf_processor.extract_questions()
18
+ questions = [q.content for q in questions]
19
+ print('QUESTIONS**********************', questions)
20
+ memory = MemorySaver()
21
+
22
+ graph = StateGraph(GraphState)
23
+
24
+ supervisor = SupervisorAgent()
25
+ graph.add_node("supervisor_agent", supervisor)
26
+ graph.add_node("verification_agent", VerificationAgent())
27
+ graph.add_node("verification_tool_node", process_tool)
28
+ graph.add_node("medical_agent", MedicalQuestionAgent(questions=questions))
29
+ graph.add_node("rag_tool_node", RAGTool(retriever=retriever,
30
+ llm=ChatOpenAI(model=os.environ["MODEL"])))
31
+
32
+ graph.set_entry_point("supervisor_agent")
33
+
34
+ graph.add_edge("verification_tool_node", "verification_agent")
35
+ graph.add_edge("rag_tool_node", "medical_agent")
36
+ graph.add_conditional_edges(
37
+ 'supervisor_agent',
38
+ supervisor.route
39
+ )
40
+ graph.add_conditional_edges(
41
+ "verification_agent",
42
+ verification_route,
43
+ {"__end__": END, "verification_tool_node": "verification_tool_node"}
44
+ )
45
+ graph.add_conditional_edges(
46
+ "medical_agent",
47
+ medical_route,
48
+ {"__end__": END, "rag_tool_node": "rag_tool_node"}
49
+ )
50
+
51
+
52
+ async def run_verfication(app, fields="", values=""):
53
+ config = {"configurable": {"thread_id": 1}}
54
+
55
+ _input = input('User: ')
56
+ while _input != 'quit':
57
+ async for event in app.astream_events({"messages": [('user', _input)], "fields": "full name, birthdate", "values": "John Doe, 1990-01-01"}, config=config, version="v2"):
58
+ if event['event'] == "on_chat_model_stream":
59
+ data = event["data"]
60
+ if data["chunk"].content:
61
+ print(data["chunk"].content.replace(
62
+ "\n", ""), end="", flush=True)
63
+
64
+ _input = input('\nUser: ')
65
+
66
+
67
+ async def run(app):
68
+ from langchain_core.messages import AIMessageChunk, HumanMessage
69
+ config = {"configurable": {"thread_id": 1}}
70
+ _user_input = input("User: ")
71
+
72
+ while _user_input != "quit":
73
+ out=""
74
+ astream = app.astream({"messages": [HumanMessage(content=_user_input)], "fields":"full name, birthdate", "values":"John Doe, 1990-01-01"}, config=config, stream_mode="messages")
75
+ async for msg, metadata in astream:
76
+ if isinstance(msg, AIMessageChunk):
77
+ out+=msg.content
78
+ print('Assistant: ', out)
79
+ _user_input = input("User: ")
80
+
81
+
82
+ if __name__ == "__main__":
83
+ app = graph.compile(checkpointer=memory)
84
+ asyncio.run(run(app))
85
+
backend/app/main.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from .auth import auth_router
5
+ from .webrtc import webrtc_router
6
+ load_dotenv()
7
+ app = FastAPI()
8
+
9
+ app.add_middleware(
10
+ CORSMiddleware,
11
+ allow_origins=["*"],
12
+ allow_credentials=True,
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
+
17
+ app.include_router(auth_router)
18
+ app.include_router(webrtc_router)
19
+
20
+ if __name__ == "__main__":
21
+ import uvicorn
22
+ uvicorn.run(app, host="0.0.0.0", port=8080)
backend/app/upload_pdf/__pycache__/ingest_documents.cpython-311.pyc ADDED
Binary file (5.01 kB). View file
 
backend/app/upload_pdf/__pycache__/questions_agent.cpython-311.pyc ADDED
Binary file (3.15 kB). View file
 
backend/app/upload_pdf/ingest_documents.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.document_loaders import PyMuPDFLoader
4
+ from langchain_community.vectorstores import Qdrant
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.http.models import Distance, VectorParams
7
+ from dotenv import load_dotenv
8
+ from .questions_agent import workflow
9
+ load_dotenv()
10
+
11
+ CHUNK_SIZE = 500
12
+ CHUNK_OVERLAP = 200
13
+
14
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
15
+
16
+ text_splitter = RecursiveCharacterTextSplitter(
17
+ chunk_size=CHUNK_SIZE,
18
+ chunk_overlap=CHUNK_OVERLAP
19
+ )
20
+
21
+ # Step 1: Parse the PDF and Extract Questions
22
+ class PDFProcessor:
23
+ def __init__(self, file_path):
24
+ self.file_path = file_path
25
+ self.text = ""
26
+ self.docs = PyMuPDFLoader(self.file_path).load()
27
+
28
+ def extract_text(self):
29
+ for doc in self.docs:
30
+ self.text += doc.page_content
31
+ return self.text
32
+
33
+ def extract_questions(self):
34
+ questions = []
35
+ chunks = text_splitter.split_text(self.extract_text())
36
+
37
+ config = {"configurable":{"thread_id":1}}
38
+ #state = workflow.get_state(config=config).values
39
+ question_sets = workflow.batch(config=config, inputs=[{"context":chunk, "previous_questions":[]} for chunk in chunks])
40
+
41
+ for item in question_sets:
42
+ questions.extend(item.get("previous_questions", []))
43
+
44
+ return questions
45
+
46
+
47
+ # Step 2: Split Questions and Prepare for Vector Database
48
+ class QuestionIngestor:
49
+ def __init__(self, questions):
50
+ self.questions = questions
51
+
52
+ def split_questions(self):
53
+ # Using a Text Splitter to handle long questions
54
+ splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=500,
56
+ chunk_overlap=20
57
+ )
58
+ docs = splitter.create_documents(self.questions)
59
+ return docs
60
+
61
+ # Step 3: Setup Qdrant Vector Store and Index Data
62
+ class QdrantSetup:
63
+ def __init__(self, questions):
64
+ self.questions = questions
65
+ self.qdrant_client = QdrantClient("localhost", port=6333)
66
+ self.embedding = OpenAIEmbeddings()
67
+
68
+ def setup_qdrant(self):
69
+ # Create a Qdrant collection for questions
70
+ self.qdrant_client.recreate_collection(
71
+ collection_name="questions",
72
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
73
+ )
74
+
75
+ def index_questions(self):
76
+ # Index questions into Qdrant
77
+ qdrant_vectorstore = Qdrant(
78
+ client=self.qdrant_client,
79
+ collection_name="questions",
80
+ embedding=self.embedding
81
+ )
82
+ qdrant_vectorstore.add_documents(self.questions)
83
+ # USE Parent Document Embedding for categorization
backend/app/upload_pdf/questions_agent.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.messages import HumanMessage
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langgraph.graph import StateGraph
6
+ from typing_extensions import Annotated, TypedDict
7
+ from langgraph.graph import add_messages, END
8
+ from langgraph.checkpoint.memory import MemorySaver
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
14
+ memory = MemorySaver()
15
+
16
+ class State(TypedDict):
17
+ previous_questions: Annotated[list, add_messages]
18
+ context:str
19
+
20
+ prompt = ChatPromptTemplate.from_template(
21
+ """
22
+ You are an expert at ingesting documents and creating questions for a medical questionnaire to be answered by patients with a high school level education. Given the following context that should contain medical questions, and from only this context extract all medical questions separated by '|' that would be appropriate for a patient to answer. Indicate if the question is a multiple choice and the include the possible choices. If there are no medical questions in the context, output 'None'.
23
+
24
+ Context:
25
+ {context}
26
+ """
27
+ )
28
+
29
+ def create_questions(state):
30
+ results = (prompt | llm | StrOutputParser()).invoke(state)
31
+ questions = results.split("|")
32
+
33
+ questions = [q for q in questions if q and q != 'None']
34
+ return {"previous_questions":questions, "context":state.get("context","") or ''}
35
+
36
+ graph = StateGraph(State)
37
+
38
+ graph.add_node("create_questions", create_questions)
39
+ graph.set_entry_point("create_questions")
40
+ graph.add_edge("create_questions", END)
41
+ workflow = graph.compile(checkpointer=memory)
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
backend/app/webrtc.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from fastapi import APIRouter, Depends, WebSocket
4
+ from fastapi.responses import JSONResponse
5
+ from .auth import get_current_user
6
+ from aiortc import RTCPeerConnection, RTCSessionDescription
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_core.messages import HumanMessage, RemoveMessage, AIMessageChunk
11
+ from langgraph.checkpoint.memory import MemorySaver
12
+ from .graph import graph
13
+ from typing import Dict
14
+ load_dotenv()
15
+
16
+ memory = MemorySaver()
17
+ app = graph.compile(checkpointer=memory)
18
+
19
+ webrtc_router = APIRouter()
20
+
21
+ # Initialize OpenAI
22
+ llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
23
+
24
+ prompt = ChatPromptTemplate.from_messages([
25
+ ("system", "You are expert in asking questions. Your goal is to ask full name, age, and address of a person."),
26
+ MessagesPlaceholder(variable_name="messages")
27
+ ])
28
+
29
+ chain = prompt | llm #remove this
30
+ user_memories: Dict[str, ConversationBufferMemory] = {} #remove this
31
+
32
+
33
+ @webrtc_router.post("/webrtc/offer")
34
+ async def webrtc_offer(offer: dict, current_user: dict = Depends(get_current_user)):
35
+ pc = RTCPeerConnection()
36
+ offer_obj = RTCSessionDescription(sdp=offer["sdp"], type=offer["type"])
37
+
38
+ await pc.setRemoteDescription(offer_obj)
39
+ answer = await pc.createAnswer()
40
+ await pc.setLocalDescription(answer)
41
+
42
+ # Create a new memory for the user if it doesn't exist
43
+ if current_user.username not in user_memories:
44
+ user_memories[current_user.username] = ConversationBufferMemory(
45
+ return_messages=True)
46
+
47
+ @pc.on("datachannel")
48
+ def on_datachannel(channel):
49
+ @channel.on("message")
50
+ async def on_message(message):
51
+ # Process the message using LangChain
52
+ memory = user_memories[current_user.username]
53
+ user_message = HumanMessage(content=message)
54
+
55
+ memory.chat_memory.add_user_message(user_message) #r
56
+
57
+ config = {"configurable": {"thread_id": current_user.username}}
58
+
59
+ astream = app.astream({"messages": [user_message], "fields":"full name, birthdate", "values":"John Doe, 1990-01-01"}, config=config, stream_mode="messages")
60
+ async for msg, metadata in astream:
61
+ if isinstance(msg, AIMessageChunk):
62
+ channel.send(msg.content)
63
+
64
+ return JSONResponse(content={
65
+ "sdp": pc.localDescription.sdp,
66
+ "type": pc.localDescription.type
67
+ })
68
+
69
+
70
+ @webrtc_router.post("/webrtc/ice-candidate")
71
+ async def webrtc_ice_candidate(candidate: dict, current_user: dict = Depends(get_current_user)):
72
+ # In a real-world scenario, you'd store and forward this candidate to the other peer
73
+ return JSONResponse(content={"status": "success"})
74
+
75
+
76
+ @webrtc_router.post("/webrtc/clear_memory")
77
+ async def webrtc_clear_memory(obj: dict, current_user: dict = Depends(get_current_user)):
78
+ config = {"configurable": {"thread_id": current_user.username}}
79
+ state = app.get_state(config=config)
80
+ messages = state.values.get("messages", [])
81
+ for message in messages:
82
+ app.update_state(config, {"messages": RemoveMessage(id=message.id)})
83
+ return JSONResponse(content={"status": "success"})
backend/data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/data/__init__.py ADDED
File without changes
backend/data/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (239 Bytes). View file
 
backend/data/combined_forms/temp/ACTC-Patient-Packet.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a260c610ba55458698fb28200aa6b74273c462d94a3f253cd545b9036f97406f
3
+ size 607119
backend/data/preprocessing/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/data/preprocessing/__pycache__/ingest_documents.cpython-311.pyc ADDED
Binary file (6.67 kB). View file
 
backend/data/preprocessing/__pycache__/questions_agent.cpython-311.pyc ADDED
Binary file (3.15 kB). View file
 
backend/data/preprocessing/ingest_documents.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.document_loaders import PyMuPDFLoader
4
+ from langchain_community.vectorstores import Qdrant
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.http.models import Distance, VectorParams
7
+ from dotenv import load_dotenv
8
+ from .questions_agent import workflow
9
+ load_dotenv()
10
+
11
+ CHUNK_SIZE = 500
12
+ CHUNK_OVERLAP = 200
13
+
14
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
15
+
16
+ text_splitter = RecursiveCharacterTextSplitter(
17
+ chunk_size=CHUNK_SIZE,
18
+ chunk_overlap=CHUNK_OVERLAP
19
+ )
20
+
21
+ # Step 1: Parse the PDF and Extract Questions
22
+ class PDFProcessor:
23
+ def __init__(self, file_path):
24
+ self.file_path = file_path
25
+ self.text = ""
26
+ self.docs = PyMuPDFLoader(self.file_path).load()
27
+
28
+ def extract_text(self):
29
+ for doc in self.docs:
30
+ self.text += doc.page_content
31
+ return self.text
32
+
33
+ def extract_questions(self):
34
+ questions = []
35
+ chunks = text_splitter.split_text(self.extract_text())
36
+
37
+ config = {"configurable":{"thread_id":1}}
38
+ #state = workflow.get_state(config=config).values
39
+ question_sets = workflow.batch(config=config, inputs=[{"context":chunk, "previous_questions":[]} for chunk in chunks])
40
+
41
+ for item in question_sets:
42
+ questions.extend(item.get("previous_questions", []))
43
+
44
+ return questions
45
+
46
+
47
+ # Step 2: Split Questions and Prepare for Vector Database
48
+ class QuestionIngestor:
49
+ def __init__(self, questions):
50
+ self.questions = questions
51
+
52
+ def split_questions(self):
53
+ # Using a Text Splitter to handle long questions
54
+ splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=500,
56
+ chunk_overlap=20
57
+ )
58
+ docs = splitter.create_documents(self.questions)
59
+ return docs
60
+
61
+ # Step 3: Setup Qdrant Vector Store and Index Data
62
+ class QdrantSetup:
63
+ def __init__(self, questions):
64
+ self.questions = questions
65
+ self.qdrant_client = QdrantClient("localhost", port=6333)
66
+ self.embedding = OpenAIEmbeddings()
67
+
68
+ def setup_qdrant(self):
69
+ # Create a Qdrant collection for questions
70
+ self.qdrant_client.recreate_collection(
71
+ collection_name="questions",
72
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
73
+ )
74
+
75
+ def index_questions(self):
76
+ # Index questions into Qdrant
77
+ qdrant_vectorstore = Qdrant(
78
+ client=self.qdrant_client,
79
+ collection_name="questions",
80
+ embedding=self.embedding
81
+ )
82
+ qdrant_vectorstore.add_documents(self.questions)
83
+ # USE Parent Document Embedding for categorization
backend/data/preprocessing/questions_agent.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.messages import HumanMessage
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langgraph.graph import StateGraph
6
+ from typing_extensions import Annotated, TypedDict
7
+ from langgraph.graph import add_messages, END
8
+ from langgraph.checkpoint.memory import MemorySaver
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
14
+ memory = MemorySaver()
15
+
16
+ class State(TypedDict):
17
+ previous_questions: Annotated[list, add_messages]
18
+ context:str
19
+
20
+ prompt = ChatPromptTemplate.from_template(
21
+ """
22
+ You are an expert at ingesting documents and creating questions for a medical questionnaire to be answered by patients with a high school level education. Given the following context that should contain medical questions, and from only this context extract all medical questions separated by '|' that would be appropriate for a patient to answer. Indicate if the question is a multiple choice and the include the possible choices. If there are no medical questions in the context, output 'None'.
23
+
24
+ Context:
25
+ {context}
26
+ """
27
+ )
28
+
29
+ def create_questions(state):
30
+ results = (prompt | llm | StrOutputParser()).invoke(state)
31
+ questions = results.split("|")
32
+
33
+ questions = [q for q in questions if q and q != 'None']
34
+ return {"previous_questions":questions, "context":state.get("context","") or ''}
35
+
36
+ graph = StateGraph(State)
37
+
38
+ graph.add_node("create_questions", create_questions)
39
+ graph.set_entry_point("create_questions")
40
+ graph.add_edge("create_questions", END)
41
+ workflow = graph.compile(checkpointer=memory)
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
backend/data/preprocessing/vectorstore/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/data/preprocessing/vectorstore/__pycache__/get.cpython-311.pyc ADDED
Binary file (1.36 kB). View file