Spaces:

szlevi
/

buildscout

Running

buildscout / gui.py

Chest Pain

v1.9: new DOB Stalled tab - paginate full 1.4M dataset, dedupe by complaint_number, 18-month filter on complaint date

ee15e2d 23 days ago

23.9 kB

	# gui.py - YOUR ORIGINAL FILE + STALLED TAB FIXED - 402 LINES EXACT
	from __future__ import annotations

	import os
	import io
	import json
	import base64
	import time
	from typing import Optional, Tuple

	import pandas as pd
	import gradio as gr
	from gradio.themes import Soft, colors

	from config import (
	APP_NAME,
	APP_VERSION,
	DEFAULT_PAGE_SIZE,
	DEFAULT_DAYS_WINDOW,
	DEFAULT_VISIBLE_COLUMNS,
	DATASETS,
	ALLOWED_BOROUGHS,
	)
	from services.data import SocrataClient
	from utils import pick_existing_columns, export_csv

	# ---- UI density (comfortable / compact via env) ----
	UI_DENSITY = os.getenv("UI_DENSITY", "comfortable").strip().lower()

	# ---- App header ----
	HEADER_TITLE = f"{APP_NAME} v{APP_VERSION}"
	HEADER_SUB = "NYC DOB sales-leads explorer"

	# ---- Persist visible columns per dataset on disk ----
	PREFS_PATH = os.path.join(os.path.expanduser("~"), ".buildscout_prefs.json")


	def _load_prefs() -> dict:
	try:
	with open(PREFS_PATH, "r", encoding="utf-8") as f:
	return json.load(f)
	except Exception:
	return {}


	def _save_prefs(prefs: dict) -> None:
	try:
	with open(PREFS_PATH, "w", encoding="utf-8") as f:
	json.dump(prefs, f, indent=2, sort_keys=True)
	except Exception:
	pass


	_client = SocrataClient()


	# ---------- Typography (Aptos if present, otherwise Helvetica stack) ----------
	def _read_font_file_bytes() -> Optional[bytes]:
	env_path = os.getenv("APTOS_WOFF2", "").strip()
	candidates = [env_path] if env_path else []
	candidates += [
	"aptos.woff2",
	"assets/aptos.woff2",
	"assets/fonts/aptos.woff2",
	"static/aptos.woff2",
	"static/fonts/aptos.woff2",
	]
	for p in candidates:
	if p and os.path.exists(p) and os.path.isfile(p):
	try:
	with open(p, "rb") as f:
	return f.read()
	except Exception:
	pass
	return None


	def _build_font_css() -> str:
	font_bytes = _read_font_file_bytes()
	font_face = ""
	family = "'Helvetica Neue', Helvetica, Arial, sans-serif"

	if font_bytes:
	b64 = base64.b64encode(font_bytes).decode("ascii")
	font_face = f"""
	@font-face {{
	font-family: 'Aptos';
	src: url(data:font/woff2;base64,{b64}) format('woff2');
	font-weight: 300 900;
	font-style: normal;
	font-display: swap;
	}}
	"""
	family = "'Aptos', 'Helvetica Neue', Helvetica, Arial, sans-serif"

	if UI_DENSITY == "compact":
	base_size = "14.2px"
	line_h = "1.28"
	cell_py = "6px"
	cell_px = "10px"
	else: # comfortable
	base_size = "15.6px"
	line_h = "1.38"
	cell_py = "9px"
	cell_px = "14px"

	return font_face + f"""
	:root, body, input, button, textarea, select, .gradio-container {{
	font-family: {family} !important;
	font-size: {base_size};
	line-height: {line_h};
	letter-spacing: 0.01em;
	-webkit-font-smoothing: antialiased;
	-moz-osx-font-smoothing: grayscale;
	}}

	label, .label, .wrap > label, .wrap .label {{ font-weight: 600; }}
	th, thead tr {{ font-weight: 700; }}

	/* Gradio dataframe tweaks */
	.dataframe table {{
	border-collapse: separate;
	border-spacing: 0;
	}}
	.dataframe table th,
	.dataframe table td {{
	padding: {cell_py} {cell_px};
	white-space: nowrap;
	}}
	/* keep zebra readable on dark */
	.dataframe table tbody tr:nth-child(even) td {{
	background: color-mix(in oklab, var(--neutral-800), white 4%);
	}}

	button.primary, .gr-button.primary {{ font-weight: 700; }}

	/* Make the top controls breathe but align nicely */
	.controls-col {{
	display: flex;
	flex-direction: column;
	gap: 10px;
	}}
	"""


	CUSTOM_CSS = _build_font_css()


	# ---- helpers ----
	def _sanitize_visible(visible: list[str], cols: list[str]) -> list[str]:
	set_cols = set(cols)
	v = [c for c in visible if c in set_cols]
	return v or pick_existing_columns(cols, DEFAULT_VISIBLE_COLUMNS)


	def _do_search(df: pd.DataFrame, term: str) -> pd.DataFrame:
	if not term:
	return df
	term_l = term.strip().lower()
	if not term_l:
	return df
	mask = pd.Series(False, index=df.index)
	for c in df.columns:
	if df[c].dtype == "object":
	mask \|= df[c].astype(str).str.lower().str.contains(term_l, na=False)
	return df[mask].copy()


	# ---- data fetchers ----
	def _fetch_dataset(dataset_key: str, days: int) -> Tuple[pd.DataFrame, float]:
	if dataset_key == "leads_unpermitted":
	df, secs = _client.fetch_leads_unpermitted(days=days)
	else:
	df, secs = _client.fetch_dataset_last_n_days(dataset_key, days)
	return df, secs


	# ---- UI ----
	def create_app():
	theme = Soft(
	primary_hue=colors.orange, # the orange accents you liked
	neutral_hue=colors.gray,
	)

	with gr.Blocks(theme=theme, css=CUSTOM_CSS, title=HEADER_TITLE) as demo:
	gr.Markdown(f"# {HEADER_TITLE}\n\n{HEADER_SUB}")

	with gr.Tab("Leads & Filings"):
	with gr.Row():
	with gr.Column(scale=1, min_width=280, elem_classes="controls-col"):
	ds = gr.Dropdown(
	label="Dataset",
	choices=[label for _, label in DATASETS],
	value=[label for key, label in DATASETS if key == "leads_unpermitted"][0],
	allow_custom_value=False,
	info="Default loads the last 90 days.",
	)
	with gr.Group():
	gr.Markdown("Borough (MN/BK/QN only)")
	b_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True)
	b_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True)
	b_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True)

	with gr.Row():
	reload_btn = gr.Button("Reload", variant="primary")
	reset_btn = gr.Button("Reset filters")

	with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
	gr.Markdown("Filing status (contains)")
	s_app = gr.Checkbox(value=True, label="APPROVED")
	s_obj = gr.Checkbox(value=True, label="OBJECTIONS")
	s_pen = gr.Checkbox(value=False, label="PENDING")
	s_wdr = gr.Checkbox(value=False, label="WITHDRAWN")
	s_dis = gr.Checkbox(value=False, label="DISAPPROVED")

	with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
	gr.Markdown("Permit type")
	p_gc = gr.Checkbox(value=True, label="GC (General Contractor)")
	p_st = gr.Checkbox(value=True, label="ST (Special Trade)")
	p_laa = gr.Checkbox(value=False, label="LAA")
	p_pl = gr.Checkbox(value=False, label="PL")
	p_el = gr.Checkbox(value=False, label="EL")
	p_ot = gr.Checkbox(value=False, label="OT")

	with gr.Column(scale=1, min_width=300, elem_classes="controls-col"):
	search_box = gr.Textbox(label="Search", placeholder="Free-text search across all columns…")
	with gr.Group():
	gr.Markdown("Sort by filing_date")
	sort_desc = gr.Radio(label=None, choices=["Desc", "Asc"], value="Desc")
	page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0)
	cols_acc = gr.Accordion("Columns", open=False)
	with cols_acc:
	visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[])
	export_btn = gr.Button("Export CSV", variant="secondary")

	stats_md = gr.Markdown("_Nothing loaded yet_")
	df_out = gr.Dataframe(interactive=False, wrap=False, max_height=520)
	csv_file = gr.File(label="Download CSV", visible=False)

	df_full_state = gr.State(pd.DataFrame())
	df_filtered_state = gr.State(pd.DataFrame())
	page_index_state = gr.State(0)
	source_key_state = gr.State("leads_unpermitted")

	def _dataset_key_from_label(label: str) -> str:
	for k, v in DATASETS:
	if v == label:
	return k
	return "leads_unpermitted"

	def _collect_boroughs():
	keep = []
	if b_mn.value: keep.append("MANHATTAN")
	if b_bk.value: keep.append("BROOKLYN")
	if b_qn.value: keep.append("QUEENS")
	return keep or list(ALLOWED_BOROUGHS)

	def _initial_load(ds_label, rows_per_page, order):
	key = _dataset_key_from_label(ds_label)
	df, secs = _fetch_dataset(key, DEFAULT_DAYS_WINDOW)

	allowed_now = _collect_boroughs()
	if "borough" in df.columns:
	df = df[df["borough"].isin(allowed_now)].copy()

	status_terms = []
	if s_app.value: status_terms.append("APPROVED")
	if s_obj.value: status_terms.append("OBJECTION")
	if s_pen.value: status_terms.append("PENDING")
	if s_wdr.value: status_terms.append("WITHDRAW")
	if s_dis.value: status_terms.append("DISAPPROVED")

	if status_terms and "filing_status" in df.columns:
	pat = "\|".join(status_terms)
	df = df[df["filing_status"].astype(str).str.contains(pat, case=False, na=False)]

	permit_terms = []
	if p_gc.value: permit_terms.append("GC")
	if p_st.value: permit_terms.append("ST")
	if p_laa.value: permit_terms.append("LAA")
	if p_pl.value: permit_terms.append("PL")
	if p_el.value: permit_terms.append("EL")
	if p_ot.value: permit_terms.append("OT")
	if permit_terms and "permit_type" in df.columns:
	patp = "\|".join(permit_terms)
	df = df[df["permit_type"].astype(str).str.contains(patp, case=False, na=False)]

	asc = (order == "Asc")
	if "filing_date" in df.columns:
	df = df.sort_values("filing_date", ascending=asc, kind="mergesort")

	cols_sorted = sorted(df.columns)
	prefs = _load_prefs()
	saved = prefs.get(key, None)
	visible = _sanitize_visible(saved or DEFAULT_VISIBLE_COLUMNS, cols_sorted)

	view = df[visible].head(int(rows_per_page))

	speed_indicator = "⚡" if secs < 5 else ("✅" if secs < 15 else "✓")
	stats = f"{speed_indicator} {ds_label} — Loaded {len(df):,} rows in {secs:.1f}s"
	if secs < 1: stats += " (cached)"

	return (
	view,
	df,
	df,
	0,
	stats,
	gr.update(choices=cols_sorted, value=visible),
	key,
	gr.update(visible=False, value=None),
	)

	reload_btn.click(
	_initial_load,
	inputs=[ds, page_size, sort_desc],
	outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
	)
	demo.load(
	_initial_load,
	inputs=[ds, page_size, sort_desc],
	outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
	)

	def _apply_filters(df_full, rows_per_page, search, order, visibles, ds_key):
	df = df_full.copy()
	df = _do_search(df, search)
	asc = (order == "Asc")
	if "filing_date" in df.columns:
	df = df.sort_values("filing_date", ascending=asc, kind="mergesort")

	prefs = _load_prefs()
	prefs[ds_key] = visibles
	_save_prefs(prefs)

	vis = _sanitize_visible(visibles, list(df.columns))
	view = df[vis].head(int(rows_per_page))
	return view, df, 0

	apply_btn = gr.Button("Apply filter")
	apply_btn.click(
	_apply_filters,
	inputs=[df_full_state, page_size, search_box, sort_desc, visible_cols, source_key_state],
	outputs=[df_out, df_filtered_state, page_index_state],
	)

	def _more(df_filt, page_idx, rows_per_page, visibles):
	if df_filt is None or df_filt.empty:
	return pd.DataFrame(), page_idx
	vis = _sanitize_visible(visibles, list(df_filt.columns))
	new_page = page_idx + 1
	start = 0
	end = int(rows_per_page) * (new_page + 1)
	return df_filt[vis].iloc[start:end], new_page

	load_more = gr.Button("Load more rows")
	load_more.click(
	_more,
	inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
	outputs=[df_out, page_index_state],
	)

	reset_btn.click(
	_initial_load,
	inputs=[ds, page_size, sort_desc],
	outputs=[df_out, df_full_state, df_filtered_state, page_index_state, stats_md, visible_cols, source_key_state, csv_file],
	)

	def _export(df_current: pd.DataFrame):
	if df_current is None or df_current.empty:
	return gr.update(visible=False, value=None)
	bio = export_csv(df_current)
	bio.seek(0)
	ts = int(time.time())
	path = f"/tmp/buildscout_export_{ts}.csv"
	with open(path, "wb") as f:
	f.write(bio.read())
	return gr.update(visible=True, value=path, label="Download CSV")

	export_btn.click(
	_export,
	inputs=[df_filtered_state],
	outputs=[csv_file],
	)

	# ============================== STALLED & DISTRESSED PROJECTS TAB ==============================
	with gr.Tab("Stalled & Distressed Projects"):
	gr.Markdown(
	"### Real-time scanner for stalled, frozen, or quietly dead construction sites\n"
	"Perfect for finding partial foundations, fenced holes, or projects you can restart or buy out."
	)

	with gr.Row():
	with gr.Column(scale=1, min_width=280, elem_classes="controls-col"):
	with gr.Group():
	gr.Markdown("Borough")
	st_mn = gr.Checkbox(value=True, label="MANHATTAN", interactive=True)
	st_bk = gr.Checkbox(value=True, label="BROOKLYN", interactive=True)
	st_qn = gr.Checkbox(value=True, label="QUEENS", interactive=True)

	with gr.Row():
	stalled_reload_btn = gr.Button("Reload", variant="primary")
	stalled_reset_btn = gr.Button("Reset filters")

	with gr.Column(scale=1, min_width=260, elem_classes="controls-col"):
	stalled_search = gr.Textbox(label="Search", placeholder="Free-text search across all columns…")
	with gr.Group():
	gr.Markdown("Sort by days stalled")
	stalled_sort = gr.Radio(label=None, choices=["Desc (oldest first)", "Asc (newest first)"], value="Desc (oldest first)")
	stalled_page_size = gr.Number(label="Rows / page", value=DEFAULT_PAGE_SIZE, precision=0)
	stalled_apply_btn = gr.Button("Apply filter")
	stalled_export_btn = gr.Button("Export CSV", variant="secondary")

	with gr.Column(scale=1, min_width=300, elem_classes="controls-col"):
	stalled_cols_acc = gr.Accordion("Columns", open=False)
	with stalled_cols_acc:
	stalled_visible_cols = gr.Dropdown(label="Visible columns", multiselect=True, choices=[], value=[])
	stalled_status = gr.Markdown("Click Reload to load stalled projects data")

	stalled_table = gr.Dataframe(interactive=False, wrap=False, max_height=620)
	stalled_csv_file = gr.File(label="Download CSV", visible=False)
	stalled_more_btn = gr.Button("Load more rows")

	# State - stalled_full_state holds ALL data (18 months), stalled_filtered_state holds after search/filter
	stalled_full_state = gr.State(pd.DataFrame())
	stalled_filtered_state = gr.State(pd.DataFrame())
	stalled_page_state = gr.State(0)

	# Default columns for stalled data (based on actual API schema)
	STALLED_DEFAULT_COLS = [
	"full_address", "days_stalled", "borough",
	"bin", "house_number", "street_name", "community_board",
	"complaint_number", "complaint_date", "date_complaint_received",
	"dobrundate",
	]

	def _load_stalled(rows_per_page, mn, bk, qn, sort_order):
	t0 = time.time()
	df, _ = _client.fetch_dataset_last_n_days("stalled_official", days=0)
	secs = time.time() - t0

	if df.empty:
	return (
	pd.DataFrame(),
	pd.DataFrame(),
	pd.DataFrame(),
	0,
	"⚠️ No data returned",
	gr.update(choices=[], value=[]),
	gr.update(visible=False, value=None),
	)

	# Borough filter (API already filtered to last 18 months)
	boroughs = []
	if mn: boroughs.append("MANHATTAN")
	if bk: boroughs.append("BROOKLYN")
	if qn: boroughs.append("QUEENS")
	if not boroughs:
	boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"]

	if "borough" in df.columns:
	df = df[df["borough"].isin(boroughs)].copy()

	# Sort by staleness (Desc = oldest/most stalled first = highest days, Asc = newest first = lowest days)
	ascending = "Asc" in sort_order
	if "days_stalled" in df.columns:
	df = df.sort_values("days_stalled", ascending=ascending)

	# Column selection
	cols_sorted = sorted(df.columns)
	visible = [c for c in STALLED_DEFAULT_COLS if c in cols_sorted]
	if not visible:
	visible = cols_sorted[:10]

	view = df[visible].head(int(rows_per_page))

	speed = "⚡" if secs < 2 else "✅"
	stats = f"{speed} DOB Stalled Construction Sites – Found {len(df):,} projects in {secs:.2f}s"

	return (
	view,
	df, # full state - all 18 months of data
	df, # filtered state - same initially
	0,
	stats,
	gr.update(choices=cols_sorted, value=visible),
	gr.update(visible=False, value=None),
	)

	def _apply_stalled_filters(df_full, rows_per_page, search, visibles, mn, bk, qn, sort_order):
	if df_full is None or df_full.empty:
	return pd.DataFrame(), pd.DataFrame(), 0

	df = df_full.copy()

	# Borough filter (applied to full dataset)
	boroughs = []
	if mn: boroughs.append("MANHATTAN")
	if bk: boroughs.append("BROOKLYN")
	if qn: boroughs.append("QUEENS")
	if not boroughs:
	boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS"]
	if "borough" in df.columns:
	df = df[df["borough"].isin(boroughs)].copy()

	# Search (applied to full dataset)
	df = _do_search(df, search)

	# Sort by staleness
	ascending = "Asc" in sort_order
	if "days_stalled" in df.columns:
	df = df.sort_values("days_stalled", ascending=ascending)

	vis = _sanitize_visible(visibles, list(df.columns))
	view = df[vis].head(int(rows_per_page))
	return view, df, 0

	def _stalled_more(df_filt, page_idx, rows_per_page, visibles):
	if df_filt is None or df_filt.empty:
	return pd.DataFrame(), page_idx
	vis = _sanitize_visible(visibles, list(df_filt.columns))
	new_page = page_idx + 1
	end = int(rows_per_page) * (new_page + 1)
	return df_filt[vis].iloc[:end], new_page

	def _stalled_export(df_current: pd.DataFrame):
	if df_current is None or df_current.empty:
	return gr.update(visible=False, value=None)
	bio = export_csv(df_current)
	bio.seek(0)
	ts = int(time.time())
	path = f"/tmp/stalled_export_{ts}.csv"
	with open(path, "wb") as f:
	f.write(bio.read())
	return gr.update(visible=True, value=path, label="Download CSV")

	# Event bindings
	stalled_reload_btn.click(
	_load_stalled,
	inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
	outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
	)
	stalled_reset_btn.click(
	_load_stalled,
	inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
	outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
	)
	demo.load(
	_load_stalled,
	inputs=[stalled_page_size, st_mn, st_bk, st_qn, stalled_sort],
	outputs=[stalled_table, stalled_full_state, stalled_filtered_state, stalled_page_state, stalled_status, stalled_visible_cols, stalled_csv_file],
	)

	stalled_apply_btn.click(
	_apply_stalled_filters,
	inputs=[stalled_full_state, stalled_page_size, stalled_search, stalled_visible_cols, st_mn, st_bk, st_qn, stalled_sort],
	outputs=[stalled_table, stalled_filtered_state, stalled_page_state],
	)

	stalled_more_btn.click(
	_stalled_more,
	inputs=[stalled_filtered_state, stalled_page_state, stalled_page_size, stalled_visible_cols],
	outputs=[stalled_table, stalled_page_state],
	)

	stalled_export_btn.click(
	_stalled_export,
	inputs=[stalled_filtered_state],
	outputs=[stalled_csv_file],
	)

	gr.Markdown(
	f"{APP_NAME} {APP_VERSION} · Loads last {DEFAULT_DAYS_WINDOW} days. "
	"Set SOCRATA_APP_TOKEN for higher API limits. Data is cached for performance."
	)

	return demo