Spaces:
Sleeping
Sleeping
| import logging | |
| import sys | |
| import tempfile | |
| import streamlit as st | |
| import yaml | |
| import copy | |
| from menu import display_pages_menu, display_config | |
| from pypdf import PdfReader | |
| from utils import get_pdf_iframe, set_state, generate_assets | |
| from country_by_country.processor import ReportProcessor | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") | |
| def set_page_filter(value: dict): | |
| set_state(["config", "pagefilter"], value) | |
| def initiate_configuration() -> None: | |
| st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) | |
| if isinstance(st.session_state["config"]["pagefilter"], list): | |
| st.session_state["config"]["pagefilter"] = st.session_state["initial_config"][ | |
| "pagefilter" | |
| ][0] | |
| st.session_state["selected_page_filter_name"] = st.session_state["config"][ | |
| "pagefilter" | |
| ]["type"] | |
| def on_pdf_file_upload() -> None: | |
| # Change states related to the pdf file upload | |
| mytmpfile.write(st.session_state.original_pdf.read()) | |
| st.session_state["working_file_pdf"] = mytmpfile | |
| st.session_state["original_pdf_name"] = st.session_state.original_pdf.name | |
| # Generate assets | |
| generate_assets() | |
| st.session_state["page_redirection"] = "pages/1_Selected_Pages.py" | |
| def on_config_file_upload() -> None: | |
| st.session_state["initial_config"] = st.session_state["initial_uploaded_config"] | |
| initiate_configuration() | |
| def on_change_page_filter(name_to_filter_dict: dict) -> None: | |
| st.session_state["selected_page_filter_name"] = st.session_state[ | |
| "radio_button_filter_selection" | |
| ] # this 'buffer' is needed because selectors wipe their key on reload | |
| set_page_filter(name_to_filter_dict[st.session_state["selected_page_filter_name"]]) | |
| # Check if a redirection was requested | |
| # Workaround because st.switch_page is not allowed in a callback function | |
| if st.session_state.get("page_redirection", False): | |
| page_to_redirect_to = st.session_state["page_redirection"] | |
| st.session_state["page_redirection"] = False | |
| st.switch_page(page_to_redirect_to) | |
| st.set_page_config(layout="wide", page_title="Accueil - upload de PDF") | |
| st.title("Country by Country Tax Reporting analysis") | |
| st.subheader( | |
| "This app will help you extract a table containing financial information from a pdf", | |
| ) | |
| display_pages_menu() | |
| mytmpfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) | |
| # State initialization | |
| if "first_time" not in st.session_state: | |
| logging.info("State initialization...") | |
| st.session_state["first_time"] = False | |
| logging.info("... loading default extract config") | |
| with open("extract_config.yaml", "r") as f: | |
| st.session_state["initial_config"] = yaml.safe_load(f.read()) | |
| initiate_configuration() | |
| logging.info("... initializing processor and assets") | |
| st.session_state["proc"] = ReportProcessor(st.session_state["config"]) | |
| st.session_state["assets"] = { | |
| "pagefilter": {}, | |
| "table_extractors": [], | |
| } | |
| with st.sidebar: | |
| st.markdown("# PDF Upload") | |
| st.markdown("## PDF Report to process") | |
| original_pdf = st.file_uploader( | |
| "Upload a pdf document containing financial table : ", | |
| key="original_pdf", | |
| on_change=on_pdf_file_upload, | |
| ) | |
| if "original_pdf_name" in st.session_state: | |
| st.markdown( | |
| "Already loaded file : " + st.session_state["original_pdf_name"], | |
| ) | |
| st.markdown("# Configuration:\n") | |
| # Upload personalized config if required | |
| loaded_config = st.file_uploader( | |
| "Upload a config if the default config doesn't suit you :", | |
| key="initial_uploaded_config", | |
| on_change=initiate_configuration, | |
| ) | |
| if loaded_config is not None: | |
| if not loaded_config.name.endswith(".yaml"): | |
| st.error("Please upload a yaml file") | |
| loaded_config = None | |
| try: | |
| loaded_config_dict = yaml.safe_load(loaded_config) | |
| if not ( | |
| loaded_config_dict.get("pagefilter", False) | |
| and loaded_config_dict.get("table_extraction", False) | |
| ): | |
| st.error("Please upload a valid config file") | |
| loaded_config = None | |
| except yaml.YAMLError as e: | |
| st.error("Unable to load yaml file config") | |
| loaded_config = None | |
| # Extract config | |
| if bool(loaded_config): | |
| st.session_state["initial_config"] = loaded_config_dict | |
| st.session_state["config"] = copy.deepcopy(st.session_state["initial_config"]) | |
| # Set page filter | |
| page_filter_name_to_config_mapping = { | |
| pagefilter["type"]: pagefilter | |
| for pagefilter in st.session_state["initial_config"]["pagefilter"] | |
| } | |
| page_filter_list = list(page_filter_name_to_config_mapping.keys()) | |
| current_selected_page_filter_index = page_filter_list.index( | |
| st.session_state["selected_page_filter_name"] | |
| ) | |
| selected_page_filter_name = st.radio( | |
| "Page filter", | |
| page_filter_list, | |
| index=current_selected_page_filter_index, | |
| on_change=on_change_page_filter, | |
| key="radio_button_filter_selection", | |
| args=(page_filter_name_to_config_mapping,), | |
| ) | |
| display_config() | |
| if "working_file_pdf" in st.session_state: | |
| # Once a pdf has been uploaded, it will be stored as | |
| # the "original_pdf" key in the session state. | |
| # Hence, the following code will only be executed if a pdf has been uploaded. | |
| # Display the uploaded pdf | |
| st.markdown( | |
| get_pdf_iframe(st.session_state["working_file_pdf"].name), | |
| unsafe_allow_html=True, | |
| ) | |