-
Notifications
You must be signed in to change notification settings - Fork 16
/
ChatWithYourData_v1.py
114 lines (91 loc) · 4.56 KB
/
ChatWithYourData_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
import os
# Chat UI title
st.header("Upload your own files and ask questions like ChatGPT")
st.subheader('File type supported: PDF/DOCX/TXT :city_sunrise:')
# File uploader in the sidebar on the left
with st.sidebar:
openai_api_key = st.text_input("OpenAI API Key", type="password")
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
# Set OPENAI_API_KEY as an environment variable
os.environ["OPENAI_API_KEY"] = openai_api_key
llm = ChatOpenAI(temperature=0,max_tokens=1000, model_name="gpt-3.5-turbo",streaming=True)
# Load version history from the text file
def load_version_history():
with open("version_history.txt", "r") as file:
return file.read()
with st.sidebar:
uploaded_files = st.file_uploader("Please upload your files", accept_multiple_files=True, type=None)
st.info(load_version_history(), icon="🤖")
st.info("Please refresh the browser if you decided to upload more files to reset the session", icon="🚨")
# Check if files are uploaded
if uploaded_files:
# Print the number of files to console
print(f"Number of files uploaded: {len(uploaded_files)}")
# Load the data and perform preprocessing only if it hasn't been loaded before
if "processed_data" not in st.session_state:
# Load the data from uploaded PDF files
documents = []
for uploaded_file in uploaded_files:
# Get the full file path of the uploaded file
file_path = os.path.join(os.getcwd(), uploaded_file.name)
# Save the uploaded file to disk
with open(file_path, "wb") as f:
f.write(uploaded_file.getvalue())
# Use UnstructuredFileLoader to load the PDF file
loader = UnstructuredFileLoader(file_path)
loaded_documents = loader.load()
print(f"Number of files loaded: {len(loaded_documents)}")
# Extend the main documents list with the loaded documents
documents.extend(loaded_documents)
# Chunk the data, create embeddings, and save in vectorstore
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
document_chunks = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(document_chunks, embeddings)
# Store the processed data in session state for reuse
st.session_state.processed_data = {
"document_chunks": document_chunks,
"vectorstore": vectorstore,
}
# Print the number of total chunks to console
print(f"Number of total chunks: {len(document_chunks)}")
else:
# If the processed data is already available, retrieve it from session state
document_chunks = st.session_state.processed_data["document_chunks"]
vectorstore = st.session_state.processed_data["vectorstore"]
# Initialize Langchain's QA Chain with the vectorstore
qa = ConversationalRetrievalChain.from_llm(llm,vectorstore.as_retriever())
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("Ask your questions?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Query the assistant using the latest chat history
result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
full_response = result["answer"]
message_placeholder.markdown(full_response + "|")
message_placeholder.markdown(full_response)
print(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
else:
st.write("Please upload your files.")