Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import google.generativeai as genai | |
# from langchain_openai import OpenAI / | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
# from langchain_openai import OpenAIEmbeddings | |
from langchain_community.document_loaders import Docx2txtLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from langchain.chains import create_history_aware_retriever, create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from dotenv import load_dotenv | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from sentence_transformers import SentenceTransformer | |
import pysqlite3 | |
import sys | |
sys.modules['sqlite3'] = pysqlite3 | |
import os | |
os.environ["TRANSFORMERS_OFFLINE"] = "1" | |
# Retrieve OpenAI API key from the .env file | |
GOOGLE_API_KEY = "AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c" | |
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("Gemini API key not found. Please set it in the .env file.") | |
# Set OpenAI API key | |
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY | |
# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
# Streamlit app configuration | |
st.set_page_config(page_title="College Data Chatbot", layout="centered") | |
st.title("PreCollege Chatbot GEMINI+ HuggingFace Embeddings") | |
# Initialize OpenAI LLM | |
llm = ChatGoogleGenerativeAI( | |
model="gemini-1.5-pro-latest", | |
temperature=0.2, # Slightly higher for varied responses | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
) | |
# Initialize embeddings using OpenAI | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
def load_preprocessed_vectorstore(): | |
try: | |
loader = Docx2txtLoader("./Updated_structred_aman.docx") | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
separators=["\n\n", "\n", ". ", " ", ""], | |
chunk_size=3000, | |
chunk_overlap=1000) | |
document_chunks = text_splitter.split_documents(documents) | |
vector_store = Chroma.from_documents( | |
embedding=embeddings, | |
documents=document_chunks, | |
persist_directory="./data32" | |
) | |
return vector_store | |
except Exception as e: | |
st.error(f"Error creating vector store: {e}") | |
return None | |
def get_context_retriever_chain(vector_store): | |
"""Creates a history-aware retriever chain.""" | |
retriever = vector_store.as_retriever() | |
# Define the prompt for the retriever chain | |
prompt = ChatPromptTemplate.from_messages([ | |
MessagesPlaceholder(variable_name="chat_history"), | |
("human", "{input}"), | |
("system", """Given the chat history and the latest user question, which might reference context in the chat history, | |
formulate a standalone question that can be understood without the chat history. | |
If the question is directly addressed within the provided document, provide a relevant answer. | |
If the question is not explicitly addressed in the document, return the following message: | |
'This question is beyond the scope of the available information. Please contact your mentor for further assistance.' | |
Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""") | |
]) | |
retriever_chain = create_history_aware_retriever(llm, retriever, prompt) | |
return retriever_chain | |
def get_conversational_chain(retriever_chain): | |
"""Creates a conversational chain using the retriever chain.""" | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", """Hello! I'm your PreCollege AI assistant, here to help you with your JEE Mains journey. | |
Please provide your JEE Mains rank and preferred engineering branches or colleges, | |
and I'll give you tailored advice based on our verified database. | |
Note: I will only provide information that is available within our database to ensure accuracy. Let's get started! | |
""" | |
"\n\n" | |
"{context}"), | |
MessagesPlaceholder(variable_name="chat_history"), | |
("human", "{input}") | |
]) | |
stuff_documents_chain = create_stuff_documents_chain(llm, prompt) | |
return create_retrieval_chain(retriever_chain, stuff_documents_chain) | |
def get_response(user_query): | |
retriever_chain = get_context_retriever_chain(st.session_state.vector_store) | |
conversation_rag_chain = get_conversational_chain(retriever_chain) | |
formatted_chat_history = [] | |
for message in st.session_state.chat_history: | |
if isinstance(message, HumanMessage): | |
formatted_chat_history.append({"author": "user", "content": message.content}) | |
elif isinstance(message, SystemMessage): | |
formatted_chat_history.append({"author": "assistant", "content": message.content}) | |
response = conversation_rag_chain.invoke({ | |
"chat_history": formatted_chat_history, | |
"input": user_query | |
}) | |
return response['answer'] | |
# Load the preprocessed vector store from the local directory | |
st.session_state.vector_store = load_preprocessed_vectorstore() | |
# Initialize chat history if not present | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [ | |
{"author": "assistant", "content": "Hello, I am Precollege. How can I help you?"} | |
] | |
# Main app logic | |
if st.session_state.get("vector_store") is None: | |
st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.") | |
else: | |
# Display chat history | |
with st.container(): | |
for message in st.session_state.chat_history: | |
if message["author"] == "assistant": | |
with st.chat_message("system"): | |
st.write(message["content"]) | |
elif message["author"] == "user": | |
with st.chat_message("human"): | |
st.write(message["content"]) | |
# Add user input box below the chat | |
with st.container(): | |
with st.form(key="chat_form", clear_on_submit=True): | |
user_query = st.text_input("Type your message here...", key="user_input") | |
submit_button = st.form_submit_button("Send") | |
if submit_button and user_query: | |
# Get bot response | |
response = get_response(user_query) | |
st.session_state.chat_history.append({"author": "user", "content": user_query}) | |
st.session_state.chat_history.append({"author": "assistant", "content": response}) | |
# Rerun the app to refresh the chat display | |
st.rerun() | |
"""""" | |