buildscout / gui.py
Chest Pain
v1.9: new DOB Stalled tab - paginate full 1.4M dataset, dedupe by complaint_number, 18-month filter on complaint date
ee15e2d
# gui.py - YOUR ORIGINAL FILE + STALLED TAB FIXED - 402 LINES EXACT
from __future__ import annotations
import os
import io
import json
import base64
import time
from typing import Optional, Tuple
import pandas as pd
import gradio as gr
from gradio.themes import Soft, colors
from config import (
APP_NAME,
APP_VERSION,
DEFAULT_PAGE_SIZE,
DEFAULT_DAYS_WINDOW,
DEFAULT_VISIBLE_COLUMNS,
DATASETS,
ALLOWED_BOROUGHS,
)
from services.data import SocrataClient
from utils import pick_existing_columns, export_csv
# ---- UI density (comfortable / compact via env) ----
UI_DENSITY = os.getenv("UI_DENSITY", "comfortable").strip().lower()
# ---- App header ----
HEADER_TITLE = f"{APP_NAME} v{APP_VERSION}"
HEADER_SUB = "NYC DOB sales-leads explorer"
# ---- Persist visible columns per dataset on disk ----
PREFS_PATH = os.path.join(os.path.expanduser("~"), ".buildscout_prefs.json")
def _load_prefs() -> dict:
try:
with open(PREFS_PATH, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return {}
def _save_prefs(prefs: dict) -> None:
try:
with open(PREFS_PATH, "w", encoding="utf-8") as f:
json.dump(prefs, f, indent=2, sort_keys=True)
except Exception:
pass
_client = SocrataClient()
# ---------- Typography (Aptos if present, otherwise Helvetica stack) ----------
def _read_font_file_bytes() -> Optional[bytes]:
env_path = os.getenv("APTOS_WOFF2", "").strip()
candidates = [env_path] if env_path else []
candidates += [
"aptos.woff2",
"assets/aptos.woff2",
"assets/fonts/aptos.woff2",
"static/aptos.woff2",
"static/fonts/aptos.woff2",
]
for p in candidates:
if p and os.path.exists(p) and os.path.isfile(p):
try:
with open(p, "rb") as f:
return f.read()
except Exception:
pass
return None
def _build_font_css() -> str:
font_bytes = _read_font_file_bytes()
font_face = ""
family = "'Helvetica Neue', Helvetica, Arial, sans-serif"
if font_bytes:
b64 = base64.b64encode(font_bytes).decode("ascii")
font_face = f"""
@font-face {{
font-family: 'Aptos';
src: url(data:font/woff2;base64,{b64}) format('woff2');
font-weight: 300 900;
font-style: normal;
font-display: swap;
}}
"""
family = "'Aptos', 'Helvetica Neue', Helvetica, Arial, sans-serif"
if UI_DENSITY == "compact":
base_size = "14.2px"
line_h = "1.28"
cell_py = "6px"
cell_px = "10px"
else: # comfortable
base_size = "15.6px"
line_h = "1.38"
cell_py = "9px"
cell_px = "14px"
return font_face + f"""
:root, body, input, button, textarea, select, .gradio-container {{
font-family: {family} !important;
font-size: {base_size};
line-height: {line_h};
letter-spacing: 0.01em;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}}
label, .label, .wrap > label, .wrap .label {{ font-weight: 600; }}
th, thead tr {{ font-weight: 700; }}
/* Gradio dataframe tweaks */
.dataframe table {{
border-collapse: separate;
border-spacing: 0;
}}
.dataframe table th,
.dataframe table td {{
padding: {cell_py} {cell_px};
white-space: nowrap;
}}
/* keep zebra readable on dark */
.dataframe table tbody tr:nth-child(even) td {{
background: color-mix(in oklab, var(--neutral-800), white 4%);
}}
button.primary, .gr-button.primary {{ font-weight: 700; }}
/* Make the top controls breathe but align nicely */
.controls-col {{
display: flex;
flex-direction: column;
gap: 10px;
}}
"""
CUSTOM_CSS = _build_font_css()
# ---- helpers ----
def _sanitize_visible(visible: list[str], cols: list[str]) -> list[str]:
set_cols = set(cols)
v = [c for c in visible if c in set_cols]
return v or pick_existing_columns(cols, DEFAULT_VISIBLE_COLUMNS)
def _do_search(df: pd.DataFrame, term: str) -> pd.DataFrame:
if not term:
return df
term_l = term.strip().lower()
if not term_l:
return df
mask = pd.Series(False, index=df.index)
for c in df.columns:
if df[c].dtype == "object":
mask |= df[c].astype(str).str.lower().str.contains(term_l, na=False)
return df[mask].copy()
# ---- data fetchers ----
def _fetch_dataset(dataset_key: str, days: int) -> Tuple[pd.DataFrame, float]:
if dataset_key == "leads_unpermitted":
df, secs = _client.fetch_leads_unpermitted(days=days)
else:
df, secs = _client.fetch_dataset_last_n_days(dataset_key, days)
return df, secs
# ---- UI ----
def create_app():
theme = Soft(
primary_hue=colors.orange, # the orange accents you liked
neutral_hue=colors.gray,
)
with gr.Blocks(theme=theme, css=CUSTOM_CSS, title=HEADER_TITLE) as demo:
gr.Markdown(f"# {HEADER_TITLE}\n\n{HEADER_SUB}")
with gr.Tab("Leads & Filings"):
with gr.Row():
with gr.Column(scale=1, min_width=280, elem_classes="controls-col"):
ds = gr.Dropdown(
label="Dataset",
choices=[label for _, label in DATASETS],
value=[label for key, label in DATASETS if key == "leads_unpermitted"][0],
allow_custom_value=False,
info="Default loads the last 90 days.",
)
with gr.Group():
gr.Markdown("**Borough** (MN/BK/QN only)")
b_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True)
b_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True)
b_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True)
with gr.Row():
reload_btn = gr.Button("Reload", variant="primary")
reset_btn = gr.Button("Reset filters")
with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
gr.Markdown("**Filing status** (contains)")
s_app = gr.Checkbox(value=True, label="APPROVED")
s_obj = gr.Checkbox(value=True, label="OBJECTIONS")
s_pen = gr.Checkbox(value=False, label="PENDING")
s_wdr = gr.Checkbox(value=False, label="WITHDRAWN")
s_dis = gr.Checkbox(value=False, label="DISAPPROVED")
with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
gr.Markdown("**Permit type**")
p_gc = gr.Checkbox(value=True, label="GC (General Contractor)")
p_st = gr.Checkbox(value=True, label="ST (Special Trade)")
p_laa = gr.Checkbox(value=False, label="LAA")
p_pl = gr.Checkbox(value=False, label="PL")
p_el = gr.Checkbox(value=False, label="EL")
p_ot = gr.Checkbox(value=False, label="OT")
with gr.Column(scale=1, min_width=300, elem_classes="controls-col"):
search_box = gr.Textbox(label="Search", placeholder="Free-text search across all columns…")
with gr.Group():
gr.Markdown("Sort by **filing_date**")
sort_desc = gr.Radio(label=None, choices=["Desc", "Asc"], value="Desc")
page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0)
cols_acc = gr.Accordion("Columns", open=False)
with cols_acc:
visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[])
export_btn = gr.Button("Export CSV", variant="secondary")
stats_md = gr.Markdown("_Nothing loaded yet_")
df_out = gr.Dataframe(interactive=False, wrap=False, max_height=520)
csv_file = gr.File(label="Download CSV", visible=False)
df_full_state = gr.State(pd.DataFrame())
df_filtered_state = gr.State(pd.DataFrame())
page_index_state = gr.State(0)
source_key_state = gr.State("leads_unpermitted")
def _dataset_key_from_label(label: str) -> str:
for k, v in DATASETS:
if v == label:
return k
return "leads_unpermitted"
def _collect_boroughs():
keep = []
if b_mn.value: keep.append("MANHATTAN")
if b_bk.value: keep.append("BROOKLYN")
if b_qn.value: keep.append("QUEENS")
return keep or list(ALLOWED_BOROUGHS)
def _initial_load(ds_label, rows_per_page, order):
key = _dataset_key_from_label(ds_label)
df, secs = _fetch_dataset(key, DEFAULT_DAYS_WINDOW)
allowed_now = _collect_boroughs()
if "borough" in df.columns:
df = df[df["borough"].isin(allowed_now)].copy()
status_terms = []
if s_app.value: status_terms.append("APPROVED")
if s_obj.value: status_terms.append("OBJECTION")
if s_pen.value: status_terms.append("PENDING")
if s_wdr.value: status_terms.append("WITHDRAW")
if s_dis.value: status_terms.append("DISAPPROVED")
if status_terms and "filing_status" in df.columns:
pat = "|".join(status_terms)
df = df[df["filing_status"].astype(str).str.contains(pat, case=False, na=False)]
permit_terms = []
if p_gc.value: permit_terms.append("GC")
if p_st.value: permit_terms.append("ST")
if p_laa.value: permit_terms.append("LAA")
if p_pl.value: permit_terms.append("PL")
if p_el.value: permit_terms.append("EL")
if p_ot.value: permit_terms.append("OT")
if permit_terms and "permit_type" in df.columns:
patp = "|".join(permit_terms)
df = df[df["permit_type"].astype(str).str.contains(patp, case=False, na=False)]
asc = (order == "Asc")
if "filing_date" in df.columns:
df = df.sort_values("filing_date", ascending=asc, kind="mergesort")
cols_sorted = sorted(df.columns)
prefs = _load_prefs()
saved = prefs.get(key, None)
visible = _sanitize_visible(saved or DEFAULT_VISIBLE_COLUMNS, cols_sorted)
view = df[visible].head(int(rows_per_page))
speed_indicator = "⚡" if secs < 5 else ("✅" if secs < 15 else "✓")
stats = f"{speed_indicator} **{ds_label}** — Loaded **{len(df):,}** rows in {secs:.1f}s"
if secs < 1: stats += " (cached)"
return (
view,
df,
df,
0,
stats,
gr.update(choices=cols_sorted, value=visible),
key,
gr.update(visible=False, value=None),
)
reload_btn.click(
_initial_load,
inputs=[ds, page_size, sort_desc],
outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
)
demo.load(
_initial_load,
inputs=[ds, page_size, sort_desc],
outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
)
def _apply_filters(df_full, rows_per_page, search, order, visibles, ds_key):
df = df_full.copy()
df = _do_search(df, search)
asc = (order == "Asc")
if "filing_date" in df.columns:
df = df.sort_values("filing_date", ascending=asc, kind="mergesort")
prefs = _load_prefs()
prefs[ds_key] = visibles
_save_prefs(prefs)
vis = _sanitize_visible(visibles, list(df.columns))
view = df[vis].head(int(rows_per_page))
return view, df, 0
apply_btn = gr.Button("Apply filter")
apply_btn.click(
_apply_filters,
inputs=[df_full_state, page_size, search_box, sort_desc, visible_cols, source_key_state],
outputs=[df_out, df_filtered_state, page_index_state],
)
def _more(df_filt, page_idx, rows_per_page, visibles):
if df_filt is None or df_filt.empty:
return pd.DataFrame(), page_idx
vis = _sanitize_visible(visibles, list(df_filt.columns))
new_page = page_idx + 1
start = 0
end = int(rows_per_page) * (new_page + 1)
return df_filt[vis].iloc[start:end], new_page
load_more = gr.Button("Load more rows")
load_more.click(
_more,
inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
outputs=[df_out, page_index_state],
)
reset_btn.click(
_initial_load,
inputs=[ds, page_size, sort_desc],
outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
)
def _export(df_current: pd.DataFrame):
if df_current is None or df_current.empty:
return gr.update(visible=False, value=None)
bio = export_csv(df_current)
bio.seek(0)
ts = int(time.time())
path = f"/tmp/buildscout_export_{ts}.csv"
with open(path, "wb") as f:
f.write(bio.read())
return gr.update(visible=True, value=path, label="Download CSV")
export_btn.click(
_export,
inputs=[df_filtered_state],
outputs=[csv_file],
)
# ============================== STALLED & DISTRESSED PROJECTS TAB ==============================
with gr.Tab("Stalled & Distressed Projects"):
gr.Markdown(
"### Real-time scanner for stalled, frozen, or quietly dead construction sites\n"
"Perfect for finding partial foundations, fenced holes, or projects you can restart or buy out."
)
with gr.Row():
with gr.Column(scale=1, min_width=280, elem_classes="controls-col"):
with gr.Group():
gr.Markdown("**Borough**")
st_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True)
st_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True)
st_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True)
with gr.Row():
stalled_reload_btn = gr.Button("Reload", variant="primary")
stalled_reset_btn = gr.Button("Reset filters")
with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
stalled_search = gr.Textbox(label="Search", placeholder="Free-text search across all columns…")
with gr.Group():
gr.Markdown("**Sort by days stalled**")
stalled_sort = gr.Radio(label=None, choices=["Desc (oldest first)", "Asc (newest first)"], value="Desc (oldest first)")
stalled_page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0)
stalled_apply_btn = gr.Button("Apply filter")
stalled_export_btn = gr.Button("Export CSV", variant="secondary")
with gr.Column(scale=1, min_width=300, elem_classes="controls-col"):
stalled_cols_acc = gr.Accordion("Columns", open=False)
with stalled_cols_acc:
stalled_visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[])
stalled_status = gr.Markdown("Click Reload to load stalled projects data")
stalled_table = gr.Dataframe(interactive=False, wrap=False, max_height=620)
stalled_csv_file = gr.File(label="Download CSV", visible=False)
stalled_more_btn = gr.Button("Load more rows")
# State - stalled_full_state holds ALL data (18 months), stalled_filtered_state holds after search/filter
stalled_full_state = gr.State(pd.DataFrame())
stalled_filtered_state = gr.State(pd.DataFrame())
stalled_page_state = gr.State(0)
# Default columns for stalled data (based on actual API schema)
STALLED_DEFAULT_COLS = [
"full_address", "days_stalled", "borough",
"bin", "house_number", "street_name", "community_board",
"complaint_number", "complaint_date", "date_complaint_received",
"dobrundate",
]
def _load_stalled(rows_per_page, mn, bk, qn, sort_order):
t0 = time.time()
df, _ = _client.fetch_dataset_last_n_days("stalled_official", days=0)
secs = time.time() - t0
if df.empty:
return (
pd.DataFrame(),
pd.DataFrame(),
pd.DataFrame(),
0,
"⚠️ No data returned",
gr.update(choices=[], value=[]),
gr.update(visible=False, value=None),
)
# Borough filter (API already filtered to last 18 months)
boroughs = []
if mn: boroughs.append("MANHATTAN")
if bk: boroughs.append("BROOKLYN")
if qn: boroughs.append("QUEENS")
if not boroughs:
boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"]
if "borough" in df.columns:
df = df[df["borough"].isin(boroughs)].copy()
# Sort by staleness (Desc = oldest/most stalled first = highest days, Asc = newest first = lowest days)
ascending = "Asc" in sort_order
if "days_stalled" in df.columns:
df = df.sort_values("days_stalled", ascending=ascending)
# Column selection
cols_sorted = sorted(df.columns)
visible = [c for c in STALLED_DEFAULT_COLS if c in cols_sorted]
if not visible:
visible = cols_sorted[:10]
view = df[visible].head(int(rows_per_page))
speed = "⚡" if secs < 2 else "✅"
stats = f"{speed} **DOB Stalled Construction Sites** – Found **{len(df):,}** projects in {secs:.2f}s"
return (
view,
df, # full state - all 18 months of data
df, # filtered state - same initially
0,
stats,
gr.update(choices=cols_sorted, value=visible),
gr.update(visible=False, value=None),
)
def _apply_stalled_filters(df_full, rows_per_page, search, visibles, mn, bk, qn, sort_order):
if df_full is None or df_full.empty:
return pd.DataFrame(), pd.DataFrame(), 0
df = df_full.copy()
# Borough filter (applied to full dataset)
boroughs = []
if mn: boroughs.append("MANHATTAN")
if bk: boroughs.append("BROOKLYN")
if qn: boroughs.append("QUEENS")
if not boroughs:
boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"]
if "borough" in df.columns:
df = df[df["borough"].isin(boroughs)].copy()
# Search (applied to full dataset)
df = _do_search(df, search)
# Sort by staleness
ascending = "Asc" in sort_order
if "days_stalled" in df.columns:
df = df.sort_values("days_stalled", ascending=ascending)
vis = _sanitize_visible(visibles, list(df.columns))
view = df[vis].head(int(rows_per_page))
return view, df, 0
def _stalled_more(df_filt, page_idx, rows_per_page, visibles):
if df_filt is None or df_filt.empty:
return pd.DataFrame(), page_idx
vis = _sanitize_visible(visibles, list(df_filt.columns))
new_page = page_idx + 1
end = int(rows_per_page) * (new_page + 1)
return df_filt[vis].iloc[:end], new_page
def _stalled_export(df_current: pd.DataFrame):
if df_current is None or df_current.empty:
return gr.update(visible=False, value=None)
bio = export_csv(df_current)
bio.seek(0)
ts = int(time.time())
path = f"/tmp/stalled_export_{ts}.csv"
with open(path, "wb") as f:
f.write(bio.read())
return gr.update(visible=True, value=path, label="Download CSV")
# Event bindings
stalled_reload_btn.click(
_load_stalled,
inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
)
stalled_reset_btn.click(
_load_stalled,
inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
)
demo.load(
_load_stalled,
inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
)
stalled_apply_btn.click(
_apply_stalled_filters,
inputs=[stalled_full_state, stalled_page_size, stalled_search, stalled_visible_cols, st_mn, st_bk, st_qn, stalled_sort],
outputs=[stalled_table, stalled_filtered_state, stalled_page_state],
)
stalled_more_btn.click(
_stalled_more,
inputs=[stalled_filtered_state, stalled_page_state, stalled_page_size, stalled_visible_cols],
outputs=[stalled_table, stalled_page_state],
)
stalled_export_btn.click(
_stalled_export,
inputs=[stalled_filtered_state],
outputs=[stalled_csv_file],
)
gr.Markdown(
f"*{APP_NAME} {APP_VERSION}* · Loads last **{DEFAULT_DAYS_WINDOW}** days. "
"Set **SOCRATA_APP_TOKEN** for higher API limits. Data is cached for performance."
)
return demo