Spaces:

Parthiban97
/

Chat_With_IPYNB_Files

Sleeping

App Files Files Community

Chat_With_IPYNB_Files / app.py

Parthiban97

Update app.py

6f044cf verified over 1 year ago

raw

history blame

5.69 kB

	import streamlit as st
	import os
	import tempfile
	import time
	import nbformat
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.chains import create_retrieval_chain
	from langchain_community.vectorstores import FAISS
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from dotenv import load_dotenv
	from langchain_core.documents import Document

	load_dotenv()

	st.set_page_config(page_title="Chat with Notebooks", page_icon=":books:")

	st.title("Chat Gemini Document Q&A with Jupyter Notebooks")

	# Custom prompt template
	custom_context_input = """
	<context>
	{context}
	</context>
	Questions:{input}
	"""

	# Default prompt template
	default_prompt_template = """
	Answer the questions based on the provided context only.
	Please provide the most accurate response based on the question
	<context>
	{context}
	</context>
	Questions:{input}
	"""

	def load_notebook(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	notebook = nbformat.read(f, as_version=4)
	return notebook

	def extract_text_from_notebook(notebook):
	text = []
	for cell in notebook.cells:
	if cell.cell_type == 'markdown':
	text.append(cell.source)
	elif cell.cell_type == 'code':
	text.append(cell.source)
	if 'outputs' in cell:
	for output in cell.outputs:
	if output.output_type == 'stream':
	text.append(output.text)
	elif output.output_type == 'execute_result' and 'data' in output:
	text.append(output.data.get('text/plain', ''))
	return "\n".join(text)

	def vector_embedding(ipynb_files):
	if "vectors" not in st.session_state:
	st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

	documents = []
	for ipynb_file in ipynb_files:
	# Save the uploaded file to a temporary location
	with tempfile.NamedTemporaryFile(delete=False, suffix=".ipynb") as tmp_file:
	tmp_file.write(ipynb_file.getvalue())
	tmp_file_path = tmp_file.name

	# Load the .ipynb file from the temporary file path
	notebook = load_notebook(tmp_file_path)
	text = extract_text_from_notebook(notebook)
	# Create a Document object instead of using plain text
	documents.append(Document(page_content=text))

	# Remove the temporary file
	os.remove(tmp_file_path)

	# Ensure documents are properly segmented or chunked
	st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
	try:
	segmented_documents = st.session_state.text_splitter.split_documents(documents)
	st.session_state.final_documents = segmented_documents

	if st.session_state.final_documents:
	# Embedding using FAISS
	st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
	st.success("Document embedding is completed!")
	else:
	st.warning("No documents found to embed.")

	except Exception as e:
	st.error(f"Error splitting or embedding documents: {str(e)}")
	st.session_state.final_documents = [] # Handle empty documents or retry

	# Define model options for Gemini
	model_options = [
	"gemini-1.5-flash",
	"gemini-1.5-pro",
	"gemini-1.0-pro"
	]

	# Sidebar elements
	with st.sidebar:
	st.header("Configuration")
	st.markdown("Enter your API key below:")
	google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
	selected_model = st.selectbox("Select Gemini Model", model_options)
	os.environ["GOOGLE_API_KEY"] = str(google_api_key)

	st.markdown("Upload your .ipynb files:")
	uploaded_files = st.file_uploader("Choose .ipynb files", accept_multiple_files=True, type="ipynb")

	# Custom prompt text areas
	custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...(optional)")

	if st.button("Start Document Embedding"):
	if uploaded_files:
	vector_embedding(uploaded_files)
	st.success("Vector Store DB is Ready")
	else:
	st.warning("Please upload at least one .ipynb file.")

	# Main section for question input and results
	prompt1 = st.text_area("Enter Your Question From Documents")

	if prompt1 and "vectors" in st.session_state:
	if custom_prompt_template:
	custom_prompt = custom_prompt_template + custom_context_input
	prompt = ChatPromptTemplate.from_template(custom_prompt)
	else:
	prompt = ChatPromptTemplate.from_template(default_prompt_template)

	llm = ChatGoogleGenerativeAI(model=selected_model, temperature=0.3)
	document_chain = create_stuff_documents_chain(llm, prompt)
	retriever = st.session_state.vectors.as_retriever()
	retrieval_chain = create_retrieval_chain(retriever, document_chain)
	start = time.process_time()
	response = retrieval_chain.invoke({'input': prompt1})
	st.write("Response time:", time.process_time() - start)
	st.write(response['answer'])

	# With a Streamlit expander
	with st.expander("Document Similarity Search"):
	# Find the relevant chunks
	for i, doc in enumerate(response["context"]):
	st.write(doc.page_content)
	st.write("--------------------------------")