Spaces:
Running
Running
Chest Pain
v1.9: new DOB Stalled tab - paginate full 1.4M dataset, dedupe by complaint_number, 18-month filter on complaint date
ee15e2d
| # gui.py - YOUR ORIGINAL FILE + STALLED TAB FIXED - 402 LINES EXACT | |
| from __future__ import annotations | |
| import os | |
| import io | |
| import json | |
| import base64 | |
| import time | |
| from typing import Optional, Tuple | |
| import pandas as pd | |
| import gradio as gr | |
| from gradio.themes import Soft, colors | |
| from config import ( | |
| APP_NAME, | |
| APP_VERSION, | |
| DEFAULT_PAGE_SIZE, | |
| DEFAULT_DAYS_WINDOW, | |
| DEFAULT_VISIBLE_COLUMNS, | |
| DATASETS, | |
| ALLOWED_BOROUGHS, | |
| ) | |
| from services.data import SocrataClient | |
| from utils import pick_existing_columns, export_csv | |
| # ---- UI density (comfortable / compact via env) ---- | |
| UI_DENSITY = os.getenv("UI_DENSITY", "comfortable").strip().lower() | |
| # ---- App header ---- | |
| HEADER_TITLE = f"{APP_NAME} v{APP_VERSION}" | |
| HEADER_SUB = "NYC DOB sales-leads explorer" | |
| # ---- Persist visible columns per dataset on disk ---- | |
| PREFS_PATH = os.path.join(os.path.expanduser("~"), ".buildscout_prefs.json") | |
| def _load_prefs() -> dict: | |
| try: | |
| with open(PREFS_PATH, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except Exception: | |
| return {} | |
| def _save_prefs(prefs: dict) -> None: | |
| try: | |
| with open(PREFS_PATH, "w", encoding="utf-8") as f: | |
| json.dump(prefs, f, indent=2, sort_keys=True) | |
| except Exception: | |
| pass | |
| _client = SocrataClient() | |
| # ---------- Typography (Aptos if present, otherwise Helvetica stack) ---------- | |
| def _read_font_file_bytes() -> Optional[bytes]: | |
| env_path = os.getenv("APTOS_WOFF2", "").strip() | |
| candidates = [env_path] if env_path else [] | |
| candidates += [ | |
| "aptos.woff2", | |
| "assets/aptos.woff2", | |
| "assets/fonts/aptos.woff2", | |
| "static/aptos.woff2", | |
| "static/fonts/aptos.woff2", | |
| ] | |
| for p in candidates: | |
| if p and os.path.exists(p) and os.path.isfile(p): | |
| try: | |
| with open(p, "rb") as f: | |
| return f.read() | |
| except Exception: | |
| pass | |
| return None | |
| def _build_font_css() -> str: | |
| font_bytes = _read_font_file_bytes() | |
| font_face = "" | |
| family = "'Helvetica Neue', Helvetica, Arial, sans-serif" | |
| if font_bytes: | |
| b64 = base64.b64encode(font_bytes).decode("ascii") | |
| font_face = f""" | |
| @font-face {{ | |
| font-family: 'Aptos'; | |
| src: url(data:font/woff2;base64,{b64}) format('woff2'); | |
| font-weight: 300 900; | |
| font-style: normal; | |
| font-display: swap; | |
| }} | |
| """ | |
| family = "'Aptos', 'Helvetica Neue', Helvetica, Arial, sans-serif" | |
| if UI_DENSITY == "compact": | |
| base_size = "14.2px" | |
| line_h = "1.28" | |
| cell_py = "6px" | |
| cell_px = "10px" | |
| else: # comfortable | |
| base_size = "15.6px" | |
| line_h = "1.38" | |
| cell_py = "9px" | |
| cell_px = "14px" | |
| return font_face + f""" | |
| :root, body, input, button, textarea, select, .gradio-container {{ | |
| font-family: {family} !important; | |
| font-size: {base_size}; | |
| line-height: {line_h}; | |
| letter-spacing: 0.01em; | |
| -webkit-font-smoothing: antialiased; | |
| -moz-osx-font-smoothing: grayscale; | |
| }} | |
| label, .label, .wrap > label, .wrap .label {{ font-weight: 600; }} | |
| th, thead tr {{ font-weight: 700; }} | |
| /* Gradio dataframe tweaks */ | |
| .dataframe table {{ | |
| border-collapse: separate; | |
| border-spacing: 0; | |
| }} | |
| .dataframe table th, | |
| .dataframe table td {{ | |
| padding: {cell_py} {cell_px}; | |
| white-space: nowrap; | |
| }} | |
| /* keep zebra readable on dark */ | |
| .dataframe table tbody tr:nth-child(even) td {{ | |
| background: color-mix(in oklab, var(--neutral-800), white 4%); | |
| }} | |
| button.primary, .gr-button.primary {{ font-weight: 700; }} | |
| /* Make the top controls breathe but align nicely */ | |
| .controls-col {{ | |
| display: flex; | |
| flex-direction: column; | |
| gap: 10px; | |
| }} | |
| """ | |
| CUSTOM_CSS = _build_font_css() | |
| # ---- helpers ---- | |
| def _sanitize_visible(visible: list[str], cols: list[str]) -> list[str]: | |
| set_cols = set(cols) | |
| v = [c for c in visible if c in set_cols] | |
| return v or pick_existing_columns(cols, DEFAULT_VISIBLE_COLUMNS) | |
| def _do_search(df: pd.DataFrame, term: str) -> pd.DataFrame: | |
| if not term: | |
| return df | |
| term_l = term.strip().lower() | |
| if not term_l: | |
| return df | |
| mask = pd.Series(False, index=df.index) | |
| for c in df.columns: | |
| if df[c].dtype == "object": | |
| mask |= df[c].astype(str).str.lower().str.contains(term_l, na=False) | |
| return df[mask].copy() | |
| # ---- data fetchers ---- | |
| def _fetch_dataset(dataset_key: str, days: int) -> Tuple[pd.DataFrame, float]: | |
| if dataset_key == "leads_unpermitted": | |
| df, secs = _client.fetch_leads_unpermitted(days=days) | |
| else: | |
| df, secs = _client.fetch_dataset_last_n_days(dataset_key, days) | |
| return df, secs | |
| # ---- UI ---- | |
| def create_app(): | |
| theme = Soft( | |
| primary_hue=colors.orange, # the orange accents you liked | |
| neutral_hue=colors.gray, | |
| ) | |
| with gr.Blocks(theme=theme, css=CUSTOM_CSS, title=HEADER_TITLE) as demo: | |
| gr.Markdown(f"# {HEADER_TITLE}\n\n{HEADER_SUB}") | |
| with gr.Tab("Leads & Filings"): | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=280, elem_classes="controls-col"): | |
| ds = gr.Dropdown( | |
| label="Dataset", | |
| choices=[label for _, label in DATASETS], | |
| value=[label for key, label in DATASETS if key == "leads_unpermitted"][0], | |
| allow_custom_value=False, | |
| info="Default loads the last 90 days.", | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("**Borough** (MN/BK/QN only)") | |
| b_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True) | |
| b_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True) | |
| b_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True) | |
| with gr.Row(): | |
| reload_btn = gr.Button("Reload", variant="primary") | |
| reset_btn = gr.Button("Reset filters") | |
| with gr.Column(scale=1, min_width=260, elem_classes="controls-col"): | |
| gr.Markdown("**Filing status** (contains)") | |
| s_app = gr.Checkbox(value=True, label="APPROVED") | |
| s_obj = gr.Checkbox(value=True, label="OBJECTIONS") | |
| s_pen = gr.Checkbox(value=False, label="PENDING") | |
| s_wdr = gr.Checkbox(value=False, label="WITHDRAWN") | |
| s_dis = gr.Checkbox(value=False, label="DISAPPROVED") | |
| with gr.Column(scale=1, min_width=260, elem_classes="controls-col"): | |
| gr.Markdown("**Permit type**") | |
| p_gc = gr.Checkbox(value=True, label="GC (General Contractor)") | |
| p_st = gr.Checkbox(value=True, label="ST (Special Trade)") | |
| p_laa = gr.Checkbox(value=False, label="LAA") | |
| p_pl = gr.Checkbox(value=False, label="PL") | |
| p_el = gr.Checkbox(value=False, label="EL") | |
| p_ot = gr.Checkbox(value=False, label="OT") | |
| with gr.Column(scale=1, min_width=300, elem_classes="controls-col"): | |
| search_box = gr.Textbox(label="Search", placeholder="Free-text search across all columns…") | |
| with gr.Group(): | |
| gr.Markdown("Sort by **filing_date**") | |
| sort_desc = gr.Radio(label=None, choices=["Desc", "Asc"], value="Desc") | |
| page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0) | |
| cols_acc = gr.Accordion("Columns", open=False) | |
| with cols_acc: | |
| visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[]) | |
| export_btn = gr.Button("Export CSV", variant="secondary") | |
| stats_md = gr.Markdown("_Nothing loaded yet_") | |
| df_out = gr.Dataframe(interactive=False, wrap=False, max_height=520) | |
| csv_file = gr.File(label="Download CSV", visible=False) | |
| df_full_state = gr.State(pd.DataFrame()) | |
| df_filtered_state = gr.State(pd.DataFrame()) | |
| page_index_state = gr.State(0) | |
| source_key_state = gr.State("leads_unpermitted") | |
| def _dataset_key_from_label(label: str) -> str: | |
| for k, v in DATASETS: | |
| if v == label: | |
| return k | |
| return "leads_unpermitted" | |
| def _collect_boroughs(): | |
| keep = [] | |
| if b_mn.value: keep.append("MANHATTAN") | |
| if b_bk.value: keep.append("BROOKLYN") | |
| if b_qn.value: keep.append("QUEENS") | |
| return keep or list(ALLOWED_BOROUGHS) | |
| def _initial_load(ds_label, rows_per_page, order): | |
| key = _dataset_key_from_label(ds_label) | |
| df, secs = _fetch_dataset(key, DEFAULT_DAYS_WINDOW) | |
| allowed_now = _collect_boroughs() | |
| if "borough" in df.columns: | |
| df = df[df["borough"].isin(allowed_now)].copy() | |
| status_terms = [] | |
| if s_app.value: status_terms.append("APPROVED") | |
| if s_obj.value: status_terms.append("OBJECTION") | |
| if s_pen.value: status_terms.append("PENDING") | |
| if s_wdr.value: status_terms.append("WITHDRAW") | |
| if s_dis.value: status_terms.append("DISAPPROVED") | |
| if status_terms and "filing_status" in df.columns: | |
| pat = "|".join(status_terms) | |
| df = df[df["filing_status"].astype(str).str.contains(pat, case=False, na=False)] | |
| permit_terms = [] | |
| if p_gc.value: permit_terms.append("GC") | |
| if p_st.value: permit_terms.append("ST") | |
| if p_laa.value: permit_terms.append("LAA") | |
| if p_pl.value: permit_terms.append("PL") | |
| if p_el.value: permit_terms.append("EL") | |
| if p_ot.value: permit_terms.append("OT") | |
| if permit_terms and "permit_type" in df.columns: | |
| patp = "|".join(permit_terms) | |
| df = df[df["permit_type"].astype(str).str.contains(patp, case=False, na=False)] | |
| asc = (order == "Asc") | |
| if "filing_date" in df.columns: | |
| df = df.sort_values("filing_date", ascending=asc, kind="mergesort") | |
| cols_sorted = sorted(df.columns) | |
| prefs = _load_prefs() | |
| saved = prefs.get(key, None) | |
| visible = _sanitize_visible(saved or DEFAULT_VISIBLE_COLUMNS, cols_sorted) | |
| view = df[visible].head(int(rows_per_page)) | |
| speed_indicator = "⚡" if secs < 5 else ("✅" if secs < 15 else "✓") | |
| stats = f"{speed_indicator} **{ds_label}** — Loaded **{len(df):,}** rows in {secs:.1f}s" | |
| if secs < 1: stats += " (cached)" | |
| return ( | |
| view, | |
| df, | |
| df, | |
| 0, | |
| stats, | |
| gr.update(choices=cols_sorted, value=visible), | |
| key, | |
| gr.update(visible=False, value=None), | |
| ) | |
| reload_btn.click( | |
| _initial_load, | |
| inputs=[ds, page_size, sort_desc], | |
| outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file], | |
| ) | |
| demo.load( | |
| _initial_load, | |
| inputs=[ds, page_size, sort_desc], | |
| outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file], | |
| ) | |
| def _apply_filters(df_full, rows_per_page, search, order, visibles, ds_key): | |
| df = df_full.copy() | |
| df = _do_search(df, search) | |
| asc = (order == "Asc") | |
| if "filing_date" in df.columns: | |
| df = df.sort_values("filing_date", ascending=asc, kind="mergesort") | |
| prefs = _load_prefs() | |
| prefs[ds_key] = visibles | |
| _save_prefs(prefs) | |
| vis = _sanitize_visible(visibles, list(df.columns)) | |
| view = df[vis].head(int(rows_per_page)) | |
| return view, df, 0 | |
| apply_btn = gr.Button("Apply filter") | |
| apply_btn.click( | |
| _apply_filters, | |
| inputs=[df_full_state, page_size, search_box, sort_desc, visible_cols, source_key_state], | |
| outputs=[df_out, df_filtered_state, page_index_state], | |
| ) | |
| def _more(df_filt, page_idx, rows_per_page, visibles): | |
| if df_filt is None or df_filt.empty: | |
| return pd.DataFrame(), page_idx | |
| vis = _sanitize_visible(visibles, list(df_filt.columns)) | |
| new_page = page_idx + 1 | |
| start = 0 | |
| end = int(rows_per_page) * (new_page + 1) | |
| return df_filt[vis].iloc[start:end], new_page | |
| load_more = gr.Button("Load more rows") | |
| load_more.click( | |
| _more, | |
| inputs=[df_filtered_state, page_index_state, page_size, visible_cols], | |
| outputs=[df_out, page_index_state], | |
| ) | |
| reset_btn.click( | |
| _initial_load, | |
| inputs=[ds, page_size, sort_desc], | |
| outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file], | |
| ) | |
| def _export(df_current: pd.DataFrame): | |
| if df_current is None or df_current.empty: | |
| return gr.update(visible=False, value=None) | |
| bio = export_csv(df_current) | |
| bio.seek(0) | |
| ts = int(time.time()) | |
| path = f"/tmp/buildscout_export_{ts}.csv" | |
| with open(path, "wb") as f: | |
| f.write(bio.read()) | |
| return gr.update(visible=True, value=path, label="Download CSV") | |
| export_btn.click( | |
| _export, | |
| inputs=[df_filtered_state], | |
| outputs=[csv_file], | |
| ) | |
| # ============================== STALLED & DISTRESSED PROJECTS TAB ============================== | |
| with gr.Tab("Stalled & Distressed Projects"): | |
| gr.Markdown( | |
| "### Real-time scanner for stalled, frozen, or quietly dead construction sites\n" | |
| "Perfect for finding partial foundations, fenced holes, or projects you can restart or buy out." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=280, elem_classes="controls-col"): | |
| with gr.Group(): | |
| gr.Markdown("**Borough**") | |
| st_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True) | |
| st_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True) | |
| st_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True) | |
| with gr.Row(): | |
| stalled_reload_btn = gr.Button("Reload", variant="primary") | |
| stalled_reset_btn = gr.Button("Reset filters") | |
| with gr.Column(scale=1, min_width=260, elem_classes="controls-col"): | |
| stalled_search = gr.Textbox(label="Search", placeholder="Free-text search across all columns…") | |
| with gr.Group(): | |
| gr.Markdown("**Sort by days stalled**") | |
| stalled_sort = gr.Radio(label=None, choices=["Desc (oldest first)", "Asc (newest first)"], value="Desc (oldest first)") | |
| stalled_page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0) | |
| stalled_apply_btn = gr.Button("Apply filter") | |
| stalled_export_btn = gr.Button("Export CSV", variant="secondary") | |
| with gr.Column(scale=1, min_width=300, elem_classes="controls-col"): | |
| stalled_cols_acc = gr.Accordion("Columns", open=False) | |
| with stalled_cols_acc: | |
| stalled_visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[]) | |
| stalled_status = gr.Markdown("Click Reload to load stalled projects data") | |
| stalled_table = gr.Dataframe(interactive=False, wrap=False, max_height=620) | |
| stalled_csv_file = gr.File(label="Download CSV", visible=False) | |
| stalled_more_btn = gr.Button("Load more rows") | |
| # State - stalled_full_state holds ALL data (18 months), stalled_filtered_state holds after search/filter | |
| stalled_full_state = gr.State(pd.DataFrame()) | |
| stalled_filtered_state = gr.State(pd.DataFrame()) | |
| stalled_page_state = gr.State(0) | |
| # Default columns for stalled data (based on actual API schema) | |
| STALLED_DEFAULT_COLS = [ | |
| "full_address", "days_stalled", "borough", | |
| "bin", "house_number", "street_name", "community_board", | |
| "complaint_number", "complaint_date", "date_complaint_received", | |
| "dobrundate", | |
| ] | |
| def _load_stalled(rows_per_page, mn, bk, qn, sort_order): | |
| t0 = time.time() | |
| df, _ = _client.fetch_dataset_last_n_days("stalled_official", days=0) | |
| secs = time.time() - t0 | |
| if df.empty: | |
| return ( | |
| pd.DataFrame(), | |
| pd.DataFrame(), | |
| pd.DataFrame(), | |
| 0, | |
| "⚠️ No data returned", | |
| gr.update(choices=[], value=[]), | |
| gr.update(visible=False, value=None), | |
| ) | |
| # Borough filter (API already filtered to last 18 months) | |
| boroughs = [] | |
| if mn: boroughs.append("MANHATTAN") | |
| if bk: boroughs.append("BROOKLYN") | |
| if qn: boroughs.append("QUEENS") | |
| if not boroughs: | |
| boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"] | |
| if "borough" in df.columns: | |
| df = df[df["borough"].isin(boroughs)].copy() | |
| # Sort by staleness (Desc = oldest/most stalled first = highest days, Asc = newest first = lowest days) | |
| ascending = "Asc" in sort_order | |
| if "days_stalled" in df.columns: | |
| df = df.sort_values("days_stalled", ascending=ascending) | |
| # Column selection | |
| cols_sorted = sorted(df.columns) | |
| visible = [c for c in STALLED_DEFAULT_COLS if c in cols_sorted] | |
| if not visible: | |
| visible = cols_sorted[:10] | |
| view = df[visible].head(int(rows_per_page)) | |
| speed = "⚡" if secs < 2 else "✅" | |
| stats = f"{speed} **DOB Stalled Construction Sites** – Found **{len(df):,}** projects in {secs:.2f}s" | |
| return ( | |
| view, | |
| df, # full state - all 18 months of data | |
| df, # filtered state - same initially | |
| 0, | |
| stats, | |
| gr.update(choices=cols_sorted, value=visible), | |
| gr.update(visible=False, value=None), | |
| ) | |
| def _apply_stalled_filters(df_full, rows_per_page, search, visibles, mn, bk, qn, sort_order): | |
| if df_full is None or df_full.empty: | |
| return pd.DataFrame(), pd.DataFrame(), 0 | |
| df = df_full.copy() | |
| # Borough filter (applied to full dataset) | |
| boroughs = [] | |
| if mn: boroughs.append("MANHATTAN") | |
| if bk: boroughs.append("BROOKLYN") | |
| if qn: boroughs.append("QUEENS") | |
| if not boroughs: | |
| boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"] | |
| if "borough" in df.columns: | |
| df = df[df["borough"].isin(boroughs)].copy() | |
| # Search (applied to full dataset) | |
| df = _do_search(df, search) | |
| # Sort by staleness | |
| ascending = "Asc" in sort_order | |
| if "days_stalled" in df.columns: | |
| df = df.sort_values("days_stalled", ascending=ascending) | |
| vis = _sanitize_visible(visibles, list(df.columns)) | |
| view = df[vis].head(int(rows_per_page)) | |
| return view, df, 0 | |
| def _stalled_more(df_filt, page_idx, rows_per_page, visibles): | |
| if df_filt is None or df_filt.empty: | |
| return pd.DataFrame(), page_idx | |
| vis = _sanitize_visible(visibles, list(df_filt.columns)) | |
| new_page = page_idx + 1 | |
| end = int(rows_per_page) * (new_page + 1) | |
| return df_filt[vis].iloc[:end], new_page | |
| def _stalled_export(df_current: pd.DataFrame): | |
| if df_current is None or df_current.empty: | |
| return gr.update(visible=False, value=None) | |
| bio = export_csv(df_current) | |
| bio.seek(0) | |
| ts = int(time.time()) | |
| path = f"/tmp/stalled_export_{ts}.csv" | |
| with open(path, "wb") as f: | |
| f.write(bio.read()) | |
| return gr.update(visible=True, value=path, label="Download CSV") | |
| # Event bindings | |
| stalled_reload_btn.click( | |
| _load_stalled, | |
| inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort], | |
| outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file], | |
| ) | |
| stalled_reset_btn.click( | |
| _load_stalled, | |
| inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort], | |
| outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file], | |
| ) | |
| demo.load( | |
| _load_stalled, | |
| inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort], | |
| outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file], | |
| ) | |
| stalled_apply_btn.click( | |
| _apply_stalled_filters, | |
| inputs=[stalled_full_state, stalled_page_size, stalled_search, stalled_visible_cols, st_mn, st_bk, st_qn, stalled_sort], | |
| outputs=[stalled_table, stalled_filtered_state, stalled_page_state], | |
| ) | |
| stalled_more_btn.click( | |
| _stalled_more, | |
| inputs=[stalled_filtered_state, stalled_page_state, stalled_page_size, stalled_visible_cols], | |
| outputs=[stalled_table, stalled_page_state], | |
| ) | |
| stalled_export_btn.click( | |
| _stalled_export, | |
| inputs=[stalled_filtered_state], | |
| outputs=[stalled_csv_file], | |
| ) | |
| gr.Markdown( | |
| f"*{APP_NAME} {APP_VERSION}* · Loads last **{DEFAULT_DAYS_WINDOW}** days. " | |
| "Set **SOCRATA_APP_TOKEN** for higher API limits. Data is cached for performance." | |
| ) | |
| return demo |