Spaces:

Parthiban97
/

Chat_With_IPYNB_Files

Sleeping

App Files Files Community

Parthiban97 commited on Jul 29, 2024

Commit

dd7f3a3

verified ·

1 Parent(s): c111a70

Update app.py

Browse files

Files changed (1) hide show

app.py +149 -154

app.py CHANGED Viewed

@@ -1,155 +1,150 @@
-import streamlit as st
-import os
-import tempfile
-import time
-import nbformat
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain_core.prompts import ChatPromptTemplate
-from langchain.chains import create_retrieval_chain
-from langchain_community.vectorstores import FAISS
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from dotenv import load_dotenv
-from langchain_core.documents import Document
-load_dotenv()
-st.set_page_config(page_title="Chat with Notebooks", page_icon=":books:")
-st.title("Chat Gemini Document Q&A with Jupyter Notebooks")
-# Custom prompt template
-custom_context_input = """
-<context>
-{context}
-</context>
-Questions:{input}
-"""
-# Default prompt template
-default_prompt_template = """
-Answer the questions based on the provided context only.
-Please provide the most accurate response based on the question
-<context>
-{context}
-</context>
-Questions:{input}
-"""
-def load_notebook(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
-        notebook = nbformat.read(f, as_version=4)
-    return notebook
-def extract_text_from_notebook(notebook):
-    text = []
-    for cell in notebook.cells:
-        if cell.cell_type == 'markdown':
-            text.append(cell.source)
-        elif cell.cell_type == 'code':
-            text.append(cell.source)
-            if 'outputs' in cell:
-                for output in cell.outputs:
-                    if output.output_type == 'stream':
-                        text.append(output.text)
-                    elif output.output_type == 'execute_result' and 'data' in output:
-                        text.append(output.data.get('text/plain', ''))
-    return "\n".join(text)
-def vector_embedding(ipynb_files):
-    if "vectors" not in st.session_state:
-        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
-    documents = []
-    for ipynb_file in ipynb_files:
-        # Save the uploaded file to a temporary location
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".ipynb") as tmp_file:
-            tmp_file.write(ipynb_file.getvalue())
-            tmp_file_path = tmp_file.name
-        # Load the .ipynb file from the temporary file path
-        notebook = load_notebook(tmp_file_path)
-        text = extract_text_from_notebook(notebook)
-        # Create a Document object instead of using plain text
-        documents.append(Document(page_content=text))
-        # Remove the temporary file
-        os.remove(tmp_file_path)
-    # Ensure documents are properly segmented or chunked
-    st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
-    try:
-        segmented_documents = st.session_state.text_splitter.split_documents(documents)
-        st.session_state.final_documents = segmented_documents
-        if st.session_state.final_documents:
-            # Embedding using FAISS
-            st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
-            st.success("Document embedding is completed!")
-        else:
-            st.warning("No documents found to embed.")
-    except Exception as e:
-        st.error(f"Error splitting or embedding documents: {str(e)}")
-        st.session_state.final_documents = []  # Handle empty documents or retry
-# Define model options for Gemini
-model_options = [
-  "models/gemini-1.0-pro",
-    "models/gemini-1.0-pro-001",
-    "models/gemini-1.0-pro-latest",
-    "models/gemini-1.0-pro-vision-latest",
-    "models/gemini-1.5-flash-latest",
-    "models/gemini-1.5-pro-latest",
-    "models/gemini-pro",
-    "models/gemini-pro-vision"
-]
-# Sidebar elements
-with st.sidebar:
-    st.header("Configuration")
-    st.markdown("Enter your API key below:")
-    google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
-    selected_model = st.selectbox("Select Gemini Model", model_options)
-    os.environ["GOOGLE_API_KEY"] = str(google_api_key)
-    st.markdown("Upload your .ipynb files:")
-    uploaded_files = st.file_uploader("Choose .ipynb files", accept_multiple_files=True, type="ipynb")
-    # Custom prompt text areas
-    st.markdown("Enter a custom prompt template (optional):")
-    custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...")
-    if st.button("Start Document Embedding"):
-        if uploaded_files:
-            vector_embedding(uploaded_files)
-            st.success("Vector Store DB is Ready")
-        else:
-            st.warning("Please upload at least one .ipynb file.")
-# Main section for question input and results
-prompt1 = st.text_area("Enter Your Question From Documents")
-if prompt1 and "vectors" in st.session_state:
-    if custom_prompt_template:
-        custom_prompt = custom_prompt_template + custom_context_input
-        prompt = ChatPromptTemplate.from_template(custom_prompt)
-    else:
-        prompt = ChatPromptTemplate.from_template(default_prompt_template)
-    llm = ChatGoogleGenerativeAI(model=selected_model, temperature=0.3)
-    document_chain = create_stuff_documents_chain(llm, prompt)
-    retriever = st.session_state.vectors.as_retriever()
-    retrieval_chain = create_retrieval_chain(retriever, document_chain)
-    start = time.process_time()
-    response = retrieval_chain.invoke({'input': prompt1})
-    st.write("Response time:", time.process_time() - start)
-    st.write(response['answer'])
-    # With a Streamlit expander
-    with st.expander("Document Similarity Search"):
-        # Find the relevant chunks
-        for i, doc in enumerate(response["context"]):
-            st.write(doc.page_content)
             st.write("--------------------------------")

+import streamlit as st
+import os
+import tempfile
+import time
+import nbformat
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_community.vectorstores import FAISS
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from dotenv import load_dotenv
+from langchain_core.documents import Document
+load_dotenv()
+st.set_page_config(page_title="Chat with Notebooks", page_icon=":books:")
+st.title("Chat Gemini Document Q&A with Jupyter Notebooks")
+# Custom prompt template
+custom_context_input = """
+<context>
+{context}
+</context>
+Questions:{input}
+"""
+# Default prompt template
+default_prompt_template = """
+Answer the questions based on the provided context only.
+Please provide the most accurate response based on the question
+<context>
+{context}
+</context>
+Questions:{input}
+"""
+def load_notebook(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        notebook = nbformat.read(f, as_version=4)
+    return notebook
+def extract_text_from_notebook(notebook):
+    text = []
+    for cell in notebook.cells:
+        if cell.cell_type == 'markdown':
+            text.append(cell.source)
+        elif cell.cell_type == 'code':
+            text.append(cell.source)
+            if 'outputs' in cell:
+                for output in cell.outputs:
+                    if output.output_type == 'stream':
+                        text.append(output.text)
+                    elif output.output_type == 'execute_result' and 'data' in output:
+                        text.append(output.data.get('text/plain', ''))
+    return "\n".join(text)
+def vector_embedding(ipynb_files):
+    if "vectors" not in st.session_state:
+        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    documents = []
+    for ipynb_file in ipynb_files:
+        # Save the uploaded file to a temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".ipynb") as tmp_file:
+            tmp_file.write(ipynb_file.getvalue())
+            tmp_file_path = tmp_file.name
+        # Load the .ipynb file from the temporary file path
+        notebook = load_notebook(tmp_file_path)
+        text = extract_text_from_notebook(notebook)
+        # Create a Document object instead of using plain text
+        documents.append(Document(page_content=text))
+        # Remove the temporary file
+        os.remove(tmp_file_path)
+    # Ensure documents are properly segmented or chunked
+    st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    try:
+        segmented_documents = st.session_state.text_splitter.split_documents(documents)
+        st.session_state.final_documents = segmented_documents
+        if st.session_state.final_documents:
+            # Embedding using FAISS
+            st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
+            st.success("Document embedding is completed!")
+        else:
+            st.warning("No documents found to embed.")
+    except Exception as e:
+        st.error(f"Error splitting or embedding documents: {str(e)}")
+        st.session_state.final_documents = []  # Handle empty documents or retry
+# Define model options for Gemini
+model_options = [
+  "gemini-1.5-flash",
+  "gemini-1.5-pro",
+  "gemini-1.0-pro"
+]
+# Sidebar elements
+with st.sidebar:
+    st.header("Configuration")
+    st.markdown("Enter your API key below:")
+    google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
+    selected_model = st.selectbox("Select Gemini Model", model_options)
+    os.environ["GOOGLE_API_KEY"] = str(google_api_key)
+    st.markdown("Upload your .ipynb files:")
+    uploaded_files = st.file_uploader("Choose .ipynb files", accept_multiple_files=True, type="ipynb")
+    # Custom prompt text areas
+    st.markdown("Enter a custom prompt template (optional):")
+    custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...")
+    if st.button("Start Document Embedding"):
+        if uploaded_files:
+            vector_embedding(uploaded_files)
+            st.success("Vector Store DB is Ready")
+        else:
+            st.warning("Please upload at least one .ipynb file.")
+# Main section for question input and results
+prompt1 = st.text_area("Enter Your Question From Documents")
+if prompt1 and "vectors" in st.session_state:
+    if custom_prompt_template:
+        custom_prompt = custom_prompt_template + custom_context_input
+        prompt = ChatPromptTemplate.from_template(custom_prompt)
+    else:
+        prompt = ChatPromptTemplate.from_template(default_prompt_template)
+    llm = ChatGoogleGenerativeAI(model=selected_model, temperature=0.3)
+    document_chain = create_stuff_documents_chain(llm, prompt)
+    retriever = st.session_state.vectors.as_retriever()
+    retrieval_chain = create_retrieval_chain(retriever, document_chain)
+    start = time.process_time()
+    response = retrieval_chain.invoke({'input': prompt1})
+    st.write("Response time:", time.process_time() - start)
+    st.write(response['answer'])
+    # With a Streamlit expander
+    with st.expander("Document Similarity Search"):
+        # Find the relevant chunks
+        for i, doc in enumerate(response["context"]):
+            st.write(doc.page_content)
             st.write("--------------------------------")