Spaces:

szlevi
/

buildscout

Running

App Files Files Community

Chest Pain commited on Aug 29

Commit

0b5326d

1 Parent(s): 18c3dae

Add initial BuildScout source code

Browse files

Files changed (6) hide show

app.py +12 -0
config.py +49 -0
gui.py +378 -0
services/data.py +231 -0
singleapp.py +339 -0
utils.py +34 -0

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from gui import create_app
+if __name__ == "__main__":
+    demo = create_app()
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
+# from gui import create_app
+# if __name__ == "__main__":
+#     demo = create_app()
+#     demo.launch(server_name="0.0.0.0", server_port=7860)

config.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# config.py
+import os
+# --- API credentials (env) ---
+SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN", "").strip()
+# --- Defaults for the UI ---
+DEFAULT_API_LIMIT = int(os.getenv("DEFAULT_API_LIMIT", "5000"))
+DEFAULT_PAGE_SIZE = int(os.getenv("DEFAULT_PAGE_SIZE", "200"))
+# Initial visible columns (shown if they exist in the dataset)
+DEFAULT_VISIBLE_COLUMNS = [
+    "filing_date",
+    "borough",
+    "full_address",
+    "street_name",
+    "house_no",
+    "block",
+    "lot",
+    "job_filing_number",
+    "job_type",
+    "filing_status",
+    "job_status",
+    "job_status_descrp",
+    "job_description",
+]
+# Datasets exposed in the UI selector.
+# Keys are internal IDs the service layer understands; labels show in the UI.
+DEFAULT_DATASETS = [
+    ("job_filings", "DOB NOW – Job Filings (w9ak-ipjd)"),
+    ("legacy_jobs", "Legacy Job Applications (ic3t-wcy2)"),
+    ("permit_issuance", "Permit Issuance (rbx6-tga4)"),
+]
+# For convenience, a canonical borough ordering & mapping (used in services)
+BOROUGH_MAP = {
+    "MN": "MANHATTAN",
+    "BX": "BRONX",
+    "BK": "BROOKLYN",
+    "QN": "QUEENS",
+    "SI": "STATEN ISLAND",
+    "MANHATTAN": "MANHATTAN",
+    "BRONX": "BRONX",
+    "BROOKLYN": "BROOKLYN",
+    "QUEENS": "QUEENS",
+    "STATEN ISLAND": "STATEN ISLAND",
+}
+BOROUGH_ORDER = ["MANHATTAN", "BRONX", "BROOKLYN", "QUEENS", "STATEN ISLAND"]

gui.py ADDED Viewed

	@@ -0,0 +1,378 @@

+# gui.py
+import os
+import io
+import uuid
+import pandas as pd
+import gradio as gr
+from config import (
+    DEFAULT_DATASETS,
+    DEFAULT_API_LIMIT,
+    DEFAULT_VISIBLE_COLUMNS,
+    BOROUGH_ORDER,
+)
+from services.data import SocrataClient
+#-- global --
+APP_NAME = "BuildScout"
+APP_VERSION = "1.8-beta"   # bump to 1.81, 1.82, ... until v2.0 release
+HEADER_TITLE = f"{APP_NAME} v{APP_VERSION}"
+HEADER_SUB = "NYC DOB sales-leads explorer (DOB NOW filings joined against BIS permit issuance)"
+# ---------------- helpers ----------------
+LEADS_KEY = "leads_unpermitted"
+LEADS_LABEL = "Sales Leads — Job Filings without Issued Permit"
+def _dataset_label_key_maps():
+    """
+    Build label<->key maps for dropdown.
+    Adds a virtual 'leads' source on top of DEFAULT_DATASETS.
+    """
+    key_to_label = {k: v for k, v in DEFAULT_DATASETS}
+    label_to_key = {v: k for k, v in DEFAULT_DATASETS}
+    # Inject virtual "leads" at the front
+    labels = [LEADS_LABEL] + [v for _, v in DEFAULT_DATASETS]
+    label_to_key[LEADS_LABEL] = LEADS_KEY
+    key_to_label[LEADS_KEY] = LEADS_LABEL
+    return labels, label_to_key, key_to_label
+def _sanitize_visible(visible: list[str], all_cols: list[str]) -> list[str]:
+    s = set(all_cols)
+    cleaned = [c for c in (visible or []) if c in s]
+    if cleaned:
+        return cleaned
+    default = [c for c in DEFAULT_VISIBLE_COLUMNS if c in s]
+    return default or all_cols[: min(10, len(all_cols))]
+def _contains_any_column(df: pd.DataFrame, query: str) -> pd.Series:
+    if not query:
+        return pd.Series([True] * len(df), index=df.index)
+    q = str(query).strip().lower()
+    if q == "":
+        return pd.Series([True] * len(df), index=df.index)
+    return df.astype(str).apply(lambda row: any(q in str(v).lower() for v in row), axis=1)
+def _slice_up_to_page(df: pd.DataFrame, page_index: int, page_size: int) -> pd.DataFrame:
+    start = page_index * page_size
+    end = start + page_size
+    return df.iloc[start:end].copy()
+def _order_columns_for_display(df: pd.DataFrame) -> pd.DataFrame:
+    vis = [c for c in DEFAULT_VISIBLE_COLUMNS if c in df.columns]
+    rest = [c for c in df.columns if c not in vis]
+    return df[vis + rest] if vis else df
+def _apply_borough_order(df: pd.DataFrame) -> pd.DataFrame:
+    if "borough" in df.columns:
+        try:
+            cat = pd.CategoricalDtype(categories=BOROUGH_ORDER, ordered=True)
+            df["borough"] = df["borough"].astype(cat)
+        except Exception:
+            pass
+    return df
+def _sort_by_date(df: pd.DataFrame, ascending: bool) -> pd.DataFrame:
+    if "filing_date" not in df.columns:
+        return df
+    try:
+        tmp = df.copy()
+        if not pd.api.types.is_datetime64_any_dtype(tmp["filing_date"]):
+            tmp["_dt"] = pd.to_datetime(tmp["filing_date"], errors="coerce", utc=False)
+        else:
+            tmp["_dt"] = tmp["filing_date"]
+        tmp = tmp.sort_values("_dt", ascending=ascending, na_position="last").drop(columns=["_dt"])
+        return tmp
+    except Exception:
+        return df
+# ---------------- data layer wrapper ----------------
+_client = SocrataClient()
+def fetch_dataset(dataset_key: str, limit: int) -> tuple[pd.DataFrame, float]:
+    """
+    Wrapper for all sources, including the virtual 'leads' source.
+    """
+    if dataset_key == LEADS_KEY:
+        df, secs = _client.fetch_leads_unpermitted(limit_filings=limit, limit_permits=limit)
+    else:
+        df, secs = _client.fetch_permits(dataset_key=dataset_key, limit=limit)
+    if df.empty:
+        return df, secs
+    df = _apply_borough_order(df)
+    df = _order_columns_for_display(df)
+    return df, secs
+# ---------------- UI app ----------------
+def create_app():
+    labels, label_to_key, _ = _dataset_label_key_maps()
+    empty_df = pd.DataFrame(columns=DEFAULT_VISIBLE_COLUMNS)
+    with gr.Blocks(fill_height=True, title=HEADER_TITLE) as demo:
+        gr.Markdown(f"# {HEADER_TITLE}\n{HEADER_SUB}")
+        # --- Top controls
+        with gr.Row():
+            dataset_dd = gr.Dropdown(
+                label="Dataset",
+                choices=labels,
+                value=LEADS_LABEL if labels else None,   # default to Sales Leads
+                allow_custom_value=False,
+                info="Choose a dataset to load."
+            )
+            reload_btn = gr.Button("Reload", variant="primary")
+            reset_btn = gr.Button("Reset filters")
+            export_btn = gr.Button("Export CSV")
+        with gr.Row():
+            max_rows = gr.Number(
+                label="API max rows",
+                value=int(DEFAULT_API_LIMIT),
+                precision=0,
+                info="Maximum rows to request from the API (token may cap it)."
+            )
+            page_size = gr.Number(
+                label="Rows / page",
+                value=200,
+                precision=0
+            )
+            search_term = gr.Textbox(
+                label="Search",
+                placeholder="Free-text search across all columns…"
+            )
+        with gr.Row():
+            sort_order = gr.Radio(
+                label="Sort by filing_date",
+                choices=["Desc", "Asc"],
+                value="Desc",
+                info="Descending is newest-first."
+            )
+        with gr.Accordion("Columns", open=False):
+            visible_cols = gr.Dropdown(
+                label="Visible columns",
+                multiselect=True,
+                choices=[],   # set after first load
+                value=[],     # set after first load
+                allow_custom_value=False,
+            )
+        status_md = gr.Markdown("_Nothing loaded yet_")
+        with gr.Group():
+            df_out = gr.Dataframe(
+                value=empty_df,
+                type="pandas",
+                row_count=(0, "dynamic"),
+                col_count=(len(DEFAULT_VISIBLE_COLUMNS), "dynamic"),
+                interactive=False,
+                wrap=False,
+                label="Results",
+            )
+        load_more_btn = gr.Button("Load more rows")
+        csv_file = gr.File(label="Download CSV", visible=False)
+        # ----- states -----
+        df_full_state = gr.State(pd.DataFrame())
+        df_filtered_state = gr.State(pd.DataFrame())
+        df_view_state = gr.State(pd.DataFrame())
+        page_index_state = gr.State(0)
+        current_label_state = gr.State("")
+        current_key_state = gr.State("")
+        # -------- init / reload ----------
+        def _init_load(label, max_rows_val, page_sz, order):
+            if not label:
+                return (
+                    empty_df, pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), 0, "", "",
+                    "_Select a dataset_", gr.update(choices=[], value=[]), empty_df
+                )
+            dataset_key = label_to_key.get(label)
+            if not dataset_key:
+                return (
+                    empty_df, pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), 0, "", "",
+                    f"_Unknown dataset selection: {label}_",
+                    gr.update(choices=[], value=[]), empty_df
+                )
+            try:
+                limit = int(max_rows_val) if max_rows_val is not None else int(DEFAULT_API_LIMIT)
+            except Exception:
+                limit = int(DEFAULT_API_LIMIT)
+            df, secs = fetch_dataset(dataset_key, limit)
+            if df.empty:
+                return (
+                    empty_df, empty_df, empty_df, empty_df, 0, label, dataset_key,
+                    f"🗂️ **{label}** — 0 rows returned in {secs:.2f}s.",
+                    gr.update(choices=[], value=[]),
+                    empty_df
+                )
+            asc = (order == "Asc")
+            df_sorted = _sort_by_date(df, ascending=asc)
+            cols_sorted = sorted(df_sorted.columns)
+            visible = _sanitize_visible(DEFAULT_VISIBLE_COLUMNS, cols_sorted)
+            view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
+            stats = f"✅ **{label}** — loaded **{len(df_sorted):,}** rows in **{secs:.2f}s**."
+            return (
+                view,
+                df_sorted,
+                df_sorted,
+                view,
+                0,
+                label,
+                dataset_key,
+                stats,
+                gr.update(choices=cols_sorted, value=visible),
+                view
+            )
+        reload_btn.click(
+            fn=_init_load,
+            inputs=[dataset_dd, max_rows, page_size, sort_order],
+            outputs=[
+                df_out, df_full_state, df_filtered_state, df_view_state,
+                page_index_state, current_label_state, current_key_state,
+                status_md, visible_cols, df_view_state
+            ]
+        )
+        # Auto-load default (Sales Leads) on start
+        demo.load(
+            _init_load,
+            inputs=[dataset_dd, max_rows, page_size, sort_order],
+            outputs=[
+                df_out, df_full_state, df_filtered_state, df_view_state,
+                page_index_state, current_label_state, current_key_state,
+                status_md, visible_cols, df_view_state
+            ]
+        )
+        # -------- apply filter ----------
+        def _apply_filter(query, df_full, page_sz, visible, order):
+            if df_full is None or df_full.empty:
+                return empty_df, empty_df, 0, "_Nothing to filter_", empty_df, gr.update()
+            cols_sorted = sorted(df_full.columns)
+            visible = _sanitize_visible(visible, cols_sorted)
+            mask = _contains_any_column(df_full, query)
+            df_filt = df_full.loc[mask].copy()
+            asc = (order == "Asc")
+            df_filt = _sort_by_date(df_filt, ascending=asc)
+            view = _slice_up_to_page(df_filt[visible], 0, int(page_sz))
+            stats = f"Filtered: **{len(df_filt):,}** rows match"
+            return view, df_filt, 0, stats, view, gr.update(choices=cols_sorted, value=visible)
+        apply_btn = gr.Button("Apply filter")
+        apply_btn.click(
+            fn=_apply_filter,
+            inputs=[search_term, df_full_state, page_size, visible_cols, sort_order],
+            outputs=[df_out, df_filtered_state, page_index_state, status_md, df_view_state, visible_cols]
+        )
+        # -------- reset ----------
+        def _reset(df_full, page_sz, visible, order, label):
+            if df_full is None or df_full.empty:
+                return empty_df, empty_df, 0, "_Nothing loaded yet_", empty_df, gr.update()
+            cols_sorted = sorted(df_full.columns)
+            visible = _sanitize_visible(visible, cols_sorted)
+            asc = (order == "Asc")
+            df_sorted = _sort_by_date(df_full, ascending=asc)
+            view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
+            stats = f"{label} — Reset: **{len[df_sorted]:,}** rows"
+            return view, df_sorted, 0, stats, view, gr.update(choices=cols_sorted, value=visible)
+        reset_btn.click(
+            fn=_reset,
+            inputs=[df_full_state, page_size, visible_cols, sort_order, current_label_state],
+            outputs=[df_out, df_filtered_state, page_index_state, status_md, df_view_state, visible_cols]
+        )
+        # -------- sort order change ----------
+        def _resort(df_filt, page_sz, visible, order):
+            if df_filt is None or df_filt.empty:
+                return empty_df, empty_df, 0, empty_df
+            cols_sorted = sorted(df_filt.columns)
+            visible = _sanitize_visible(visible, cols_sorted)
+            asc = (order == "Asc")
+            df_sorted = _sort_by_date(df_filt, ascending=asc)
+            view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
+            return view, df_sorted, 0, view
+        sort_order.change(
+            fn=_resort,
+            inputs=[df_filtered_state, page_size, visible_cols, sort_order],
+            outputs=[df_out, df_filtered_state, page_index_state, df_view_state]
+        )
+        # -------- visible columns change ----------
+        def _change_columns(df_filt, page_idx, page_sz, visible):
+            if df_filt is None or df_filt.empty:
+                return empty_df, empty_df
+            cols_sorted = sorted(df_filt.columns)
+            visible = _sanitize_visible(visible, cols_sorted)
+            view = _slice_up_to_page(df_filt[visible], int(page_idx), int(page_sz))
+            return view, view
+        visible_cols.change(
+            fn=_change_columns,
+            inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
+            outputs=[df_out, df_view_state]
+        )
+        # -------- load more ----------
+        def _load_more(df_filt, page_idx, page_sz, visible):
+            if df_filt is None or df_filt.empty:
+                return empty_df, 0, empty_df
+            cols_sorted = sorted(df_filt.columns)
+            visible = _sanitize_visible(visible, cols_sorted)
+            new_page = int(page_idx) + 1
+            view = _slice_up_to_page(df_filt[visible], new_page, int(page_sz))
+            return view, new_page, view
+        load_more_btn.click(
+            fn=_load_more,
+            inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
+            outputs=[df_out, page_index_state, df_view_state]
+        )
+        # -------- export (File control) ----------
+        def _export(df_view):
+            if df_view is None or df_view.empty:
+                return gr.update(value=None, visible=False)
+            path = os.path.join("/tmp", f"nyc_dob_{uuid.uuid4().hex}.csv")
+            df_view.to_csv(path, index=False)
+            return gr.update(value=path, visible=True)
+        export_btn.click(_export, inputs=[df_view_state], outputs=[csv_file])
+        gr.Markdown(f"*{APP_NAME} {APP_VERSION}* · Tip: use the columns selector to display or hide more columns.")
+    return demo

services/data.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# services/data.py
+import os
+import time
+import requests
+import pandas as pd
+from typing import Tuple, Dict, Any, List
+from config import SOCRATA_APP_TOKEN, BOROUGH_MAP
+# Socrata endpoints (NYC Open Data)
+DATASET_URLS = {
+    "job_filings": "https://data.cityofnewyork.us/resource/w9ak-ipjd.json",   # DOB NOW: Job Filings
+    "legacy_jobs": "https://data.cityofnewyork.us/resource/ic3t-wcy2.json",   # Legacy BIS Jobs
+    "permit_issuance": "https://data.cityofnewyork.us/resource/rbx6-tga4.json",  # Permit Issuance (BIS)
+}
+# Per dataset: how to read core fields
+DATASET_FIELD_MAP: Dict[str, Dict[str, str]] = {
+    # DOB NOW: Job Filings
+    "job_filings": {
+        "filing_date": "filing_date",
+        "house_no": "house_no",
+        "street_name": "street_name",
+        "borough": "borough",
+        "zip": "zip",
+        "desc": "job_description",
+        "job_id": "job_filing_number",
+        "job_status": "filing_status",
+    },
+    # Legacy BIS Jobs
+    "legacy_jobs": {
+        "filing_date": "pre__filing_date",
+        "house_no": "house__",
+        "street_name": "street_name",
+        "borough": "borough",
+        "zip": "zip",
+        "desc": "job_description",
+        "job_id": "job__",
+        "job_status": "job_status",
+    },
+    # Permit Issuance (BIS)
+    "permit_issuance": {
+        "filing_date": "approved_date",
+        "house_no": "house__",
+        "street_name": "street_name",
+        "borough": "borough",
+        "zip": "zip_code",
+        "desc": "job_description",
+        "job_id": "job__",          # BIS job number (e.g., 123456789)
+        "job_status": "filing_status",
+    },
+}
+def _headers() -> Dict[str, str]:
+    h = {}
+    if SOCRATA_APP_TOKEN:
+        h["X-App-Token"] = SOCRATA_APP_TOKEN
+    return h
+def _request(url: str, params: Dict[str, Any]) -> List[Dict[str, Any]]:
+    r = requests.get(url, headers=_headers(), params=params, timeout=60)
+    if r.status_code != 200:
+        raise RuntimeError(f"API request failed: {r.status_code} {r.text}")
+    return r.json()
+def _to_datetime(series: pd.Series) -> pd.Series:
+    try:
+        return pd.to_datetime(series, errors="coerce", utc=False)
+    except Exception:
+        return pd.to_datetime(pd.Series([], dtype="object"))
+def _norm_borough(raw: pd.Series) -> pd.Series:
+    if raw is None:
+        return pd.Series([], dtype="object")
+    return raw.astype(str).str.strip().str.upper().map(lambda x: BOROUGH_MAP.get(x, x))
+def _build_full_address(df: pd.DataFrame,
+                        house_col: str,
+                        street_col: str,
+                        borough_col: str,
+                        zip_col: str | None) -> pd.Series:
+    def join_addr(row):
+        parts = []
+        h = str(row.get(house_col, "") or "").strip()
+        s = str(row.get(street_col, "") or "").strip()
+        b = str(row.get(borough_col, "") or "").strip()
+        z = str(row.get(zip_col, "") or "").strip() if zip_col else ""
+        if h: parts.append(h)
+        if s: parts.append(s)
+        if b: parts.append(b)
+        if z: parts.append(z)
+        return ", ".join(p for p in parts if p)
+    return df.apply(join_addr, axis=1)
+def _job_base_from_filing(job_filing_number: Any) -> str:
+    """
+    Normalize a DOB NOW job_filing_number like 'M0123-1234' → 'M0123'.
+    If not a string, returns ''.
+    """
+    if not isinstance(job_filing_number, str):
+        return ""
+    return job_filing_number.split("-", 1)[0].strip().upper()
+def _job_base_from_permit(job_num: Any) -> str:
+    """
+    Normalize a BIS permit job number (often numeric) to uppercase string,
+    used only for comparison.
+    """
+    if job_num is None:
+        return ""
+    return str(job_num).strip().upper()
+class SocrataClient:
+    def __init__(self):
+        if not SOCRATA_APP_TOKEN:
+            print("⚠️  Warning: SOCRATA_APP_TOKEN not set. You may be limited to 1,000 rows per call.")
+    def fetch_permits(self, dataset_key: str, limit: int = 5000) -> Tuple[pd.DataFrame, float]:
+        """
+        Fetch from one dataset key and normalize key columns.
+        Returns (df, seconds).
+        """
+        if dataset_key not in DATASET_URLS:
+            raise ValueError(f"Unknown dataset key: {dataset_key}")
+        url = DATASET_URLS[dataset_key]
+        fmap = DATASET_FIELD_MAP.get(dataset_key, {})
+        order_col = fmap.get("filing_date", ":id")
+        params = {
+            "$limit": int(limit),
+            "$order": f"{order_col} DESC" if order_col != ":id" else ":id",
+        }
+        t0 = time.time()
+        rows = _request(url, params)
+        secs = time.time() - t0
+        if not rows:
+            return pd.DataFrame(), secs
+        df = pd.DataFrame(rows)
+        # --- filing_date ---
+        filing_col = fmap.get("filing_date")
+        if filing_col and filing_col in df.columns:
+            df["filing_date"] = _to_datetime(df[filing_col])
+        else:
+            df["filing_date"] = pd.NaT
+        # --- borough ---
+        boro_col = fmap.get("borough")
+        if boro_col and boro_col in df.columns:
+            df["borough"] = _norm_borough(df[boro_col])
+        else:
+            df["borough"] = None
+        # --- Full address ---
+        house_col = fmap.get("house_no")
+        street_col = fmap.get("street_name")
+        zip_col = fmap.get("zip")
+        for c in [house_col, street_col, zip_col]:
+            if c and c not in df.columns:
+                df[c] = ""
+        df["full_address"] = _build_full_address(df, house_col or "", street_col or "", "borough", zip_col)
+        # --- Job status (harmonize light) ---
+        job_status_src = fmap.get("job_status")
+        if job_status_src and job_status_src in df.columns:
+            df["job_status"] = df[job_status_src]
+        # Keep dataset key
+        df["_dataset"] = dataset_key
+        # Natural sort newest first if we have filing_date
+        try:
+            if "filing_date" in df.columns:
+                df = df.sort_values("filing_date", ascending=False, kind="mergesort")
+        except Exception:
+            pass
+        return df, secs
+    # ---------- Sales Leads (anti-join): filings without permits ----------
+    def fetch_leads_unpermitted(self, limit_filings: int = 5000, limit_permits: int = 5000) -> Tuple[pd.DataFrame, float]:
+        """
+        Build a 'sales leads' view:
+          - Fetch DOB NOW Job Filings (w9ak-ipjd)
+          - Fetch BIS Permit Issuance (rbx6-tga4)
+          - Keep only filings whose 'base' job id has NO issued permit.
+        Returns (leads_df, seconds).
+        """
+        t0 = time.time()
+        filings_df, _ = self.fetch_permits("job_filings", limit_filings)
+        permits_df, _ = self.fetch_permits("permit_issuance", limit_permits)
+        if filings_df.empty:
+            return pd.DataFrame(), time.time() - t0
+        # Build a comparable "base" for both sides
+        if "job_filing_number" in filings_df.columns:
+            filings_bases = filings_df["job_filing_number"].map(_job_base_from_filing)
+        else:
+            # fallback: try any 'job_id' style col (from map) or empty
+            filings_bases = filings_df.get("job_id", pd.Series([""] * len(filings_df))).map(_job_base_from_filing)
+        permitted_bases: set[str] = set()
+        if not permits_df.empty:
+            # BIS permits often have 'job__' numeric
+            src = "job__" if "job__" in permits_df.columns else ("job_id" if "job_id" in permits_df.columns else None)
+            if src:
+                permitted_bases = set(permits_df[src].map(_job_base_from_permit).dropna().astype(str))
+        mask = ~filings_bases.isin(permitted_bases)
+        leads = filings_df.loc[mask].copy()
+        # Helpful flag for UI
+        leads["has_permit_already"] = False
+        secs = time.time() - t0
+        return leads, secs

singleapp.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import os
+import time
+from typing import List, Tuple, Optional
+import pandas as pd
+import requests
+import gradio as gr
+# -------------------------
+# Config
+# -------------------------
+# NYC Open Data (Socrata) dataset — you can swap later as needed.
+# Using the "w9ak-ipjd" permits dataset you tested most recently.
+SOCRATA_URL = "https://data.cityofnewyork.us/resource/w9ak-ipjd.json"
+# Read your app token from environment (recommended) or leave empty
+SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN", "").strip()
+DEFAULT_API_LIMIT = 3000          # how many rows to fetch from API initially
+DEFAULT_PAGE_SIZE = 300           # how many rows to show per “page”
+DEFAULT_ORDER = "filing_date DESC"  # server-side sort (if the column exists)
+DATE_COLUMNS_GUESS = [
+    "filing_date",
+    "latest_action_date",
+    "pre__filing_date",
+    "approved",
+    "signoff_date",
+]
+# Friendly default “important” columns to show (use only if present)
+DEFAULT_VISIBLE_COLUMNS = [
+    "job__", "doc__", "borough", "house__", "street_name",
+    "filing_date", "job_description"
+]
+# -------------------------
+# Data access
+# -------------------------
+def fetch_permits(limit: int = DEFAULT_API_LIMIT,
+                  order: str = DEFAULT_ORDER) -> Tuple[pd.DataFrame, float]:
+    """
+    Fetch up to `limit` rows from the Socrata API, optionally ordering.
+    Returns (DataFrame, seconds_elapsed).
+    """
+    headers = {}
+    if SOCRATA_APP_TOKEN:
+        headers["X-App-Token"] = SOCRATA_APP_TOKEN
+    params = {"$limit": limit}
+    if order:
+        params["$order"] = order
+    t0 = time.time()
+    r = requests.get(SOCRATA_URL, headers=headers, params=params, timeout=60)
+    r.raise_for_status()
+    data = r.json()
+    elapsed = time.time() - t0
+    if not data:
+        return pd.DataFrame(), elapsed
+    df = pd.DataFrame(data)
+    # Parse date-like columns (if present)
+    for col in DATE_COLUMNS_GUESS:
+        if col in df.columns:
+            # Try to parse; errors='coerce' leaves invalid as NaT
+            df[col] = pd.to_datetime(df[col], errors="coerce")
+    # Secondary local sort (just in case server-side order was ignored)
+    if "filing_date" in df.columns:
+        df = df.sort_values("filing_date", ascending=False, na_position="last").reset_index(drop=True)
+    return df, elapsed
+# -------------------------
+# Helpers
+# -------------------------
+def pick_existing_columns(all_cols: List[str],
+                          desired: List[str]) -> List[str]:
+    """Return only the desired columns that actually exist in df."""
+    s = set(all_cols)
+    return [c for c in desired if c in s]
+def slice_up_to_page(df: pd.DataFrame, page_index: int, page_size: int) -> pd.DataFrame:
+    """Return cumulative slice (first N pages)."""
+    end = (page_index + 1) * page_size
+    return df.iloc[:end].copy()
+def contains_any_column(df: pd.DataFrame, term: str) -> pd.Series:
+    """Case-insensitive 'contains' across object columns."""
+    if term == "":
+        return pd.Series([True] * len(df), index=df.index)
+    text_cols = [c for c in df.columns if df[c].dtype == object]
+    if not text_cols:
+        # fallback: stringify entire df (rare)
+        return df.astype(str).apply(
+            lambda row: term.lower() in row.to_string().lower(), axis=1
+        )
+    mask = pd.Series(False, index=df.index)
+    for c in text_cols:
+        mask = mask | df[c].astype(str).str.contains(term, case=False, na=False)
+    return mask
+# -------------------------
+# Gradio app logic
+# -------------------------
+def init_load(max_rows: int,
+              page_size: int) -> tuple:
+    """Reload from API. Reset state & UI."""
+    try:
+        df, seconds = fetch_permits(limit=max_rows, order=DEFAULT_ORDER)
+    except Exception as e:
+        return (gr.update(value=pd.DataFrame()),
+                gr.update(value=pd.DataFrame()),
+                gr.update(value=pd.DataFrame()),
+                0,
+                gr.update(value=f"_Error while loading data:_ `{e}`"),
+                gr.update(choices=[], value=None),
+                gr.update(choices=[], value=[]))
+    if df.empty:
+        return (gr.update(value=pd.DataFrame()),
+                df, df, 0,
+                f"_Loaded 0 records in {seconds:.1f} seconds._",
+                gr.update(choices=[], value=None),
+                gr.update(choices=[], value=[]))
+    # Visible columns defaults (only those that exist)
+    visible = pick_existing_columns(df.columns.tolist(), DEFAULT_VISIBLE_COLUMNS)
+    if not visible:  # fallback: first ~10 columns
+        visible = df.columns.tolist()[:10]
+    # Build dropdown choices (filterable fields)
+    filterable_choices = sorted(df.columns.tolist())
+    # First page slice for view
+    view = slice_up_to_page(df[visible], page_index=0, page_size=page_size)
+    stats = f"Loaded **{len(df):,}** records in **{seconds:.1f}** seconds."
+    return (gr.update(value=view),
+            df, df, 0,
+            stats,
+            gr.update(choices=filterable_choices, value="borough" if "borough" in filterable_choices else filterable_choices[0]),
+            gr.update(choices=visible, value=visible))
+def apply_filter(search_term: str,
+                 field: Optional[str],
+                 df_full: pd.DataFrame,
+                 page_size: int,
+                 visible_cols: List[str]) -> tuple:
+    """
+    Filter df_full by search_term in the selected field (or across all text fields
+    if field is None/empty). Reset to page 0, update table & stats.
+    """
+    if df_full is None or df_full.empty:
+        return (gr.update(value=pd.DataFrame()), df_full, 0, "_No data loaded yet._")
+    t0 = time.time()
+    if search_term is None:
+        search_term = ""
+    search_term = search_term.strip()
+    if field and field in df_full.columns and search_term != "":
+        # Single field filter (convert to str for robustness)
+        mask = df_full[field].astype(str).str.contains(search_term, case=False, na=False)
+    else:
+        # All columns filter (object columns)
+        mask = contains_any_column(df_full, search_term)
+    df_filtered = df_full.loc[mask].copy()
+    # Respect ‘visible_cols’ if provided
+    use_cols = [c for c in visible_cols if c in df_filtered.columns] if visible_cols else df_filtered.columns.tolist()
+    # Sort by filing_date desc (if exists)
+    if "filing_date" in df_filtered.columns:
+        df_filtered = df_filtered.sort_values("filing_date", ascending=False, na_position="last")
+    view = slice_up_to_page(df_filtered[use_cols], page_index=0, page_size=page_size)
+    seconds = time.time() - t0
+    stats = f"Filtered to **{len(df_filtered):,}** records in **{seconds:.1f}** seconds."
+    return gr.update(value=view), df_filtered, 0, stats
+def load_more(df_filtered: pd.DataFrame,
+              page_index: int,
+              page_size: int,
+              visible_cols: List[str]) -> tuple:
+    """Increase page and return cumulative rows."""
+    if df_filtered is None or df_filtered.empty:
+        return gr.update(value=pd.DataFrame()), page_index
+    new_page = page_index + 1
+    use_cols = [c for c in visible_cols if c in df_filtered.columns] if visible_cols else df_filtered.columns.tolist()
+    view = slice_up_to_page(df_filtered[use_cols], page_index=new_page, page_size=page_size)
+    return gr.update(value=view), new_page
+def reset_filters(df_full: pd.DataFrame,
+                  page_size: int,
+                  visible_cols: List[str]) -> tuple:
+    """Clear search, reset dropdown, show first page of df_full."""
+    if df_full is None or df_full.empty:
+        return (gr.update(value=pd.DataFrame()), df_full, 0, "_No data loaded yet._")
+    use_cols = [c for c in visible_cols if c in df_full.columns] if visible_cols else df_full.columns.tolist()
+    view = slice_up_to_page(df_full[use_cols], page_index=0, page_size=page_size)
+    stats = f"Showing **{len(df_full):,}** records."
+    return gr.update(value=view), df_full, 0, stats
+def export_csv(df_view: pd.DataFrame) -> str:
+    """Export current (visible) view to CSV; return file path."""
+    if df_view is None or df_view.empty:
+        # create an empty file anyway to keep UX consistent
+        path = "/mnt/data/buildscout_empty.csv"
+        pd.DataFrame().to_csv(path, index=False)
+        return path
+    path = "/mnt/data/buildscout_view.csv"
+    df_view.to_csv(path, index=False)
+    return path
+# -------------------------
+# UI
+# -------------------------
+with gr.Blocks(theme=gr.themes.Soft(), title="BuildScout v1.0") as demo:
+    gr.Markdown("# BuildScout v1.0  \nNYC DOB Permits (Gradio Edition)")
+    with gr.Row():
+        reload_btn = gr.Button("Reload DOB data", variant="secondary")
+        reset_btn = gr.Button("Reset filters", variant="secondary")
+        export_btn = gr.Button("Export current view (CSV)", variant="secondary")
+        max_rows = gr.Number(label="API Max Rows", value=DEFAULT_API_LIMIT, precision=0)
+        page_size = gr.Number(label="Rows per page", value=DEFAULT_PAGE_SIZE, precision=0)
+    with gr.Row():
+        search_term = gr.Textbox(label="Search term", placeholder="Type to search…")
+        field_dropdown = gr.Dropdown(label="Filter field", choices=[], value=None)
+        visible_cols = gr.CheckboxGroup(label="Visible columns (for display)", choices=[], value=[])
+    stats_md = gr.Markdown("_Load something!_")
+    df_out = gr.Dataframe(headers=[], row_count=10, col_count=(0, "dynamic"),
+                          interactive=False, wrap=False, height=600)
+    load_more_btn = gr.Button("Load more rows")
+    # STATE
+    df_full_state = gr.State(pd.DataFrame())     # Full dataset loaded from API
+    df_filtered_state = gr.State(pd.DataFrame()) # Filtered subset (for display)
+    page_index_state = gr.State(0)              # current page (0-based)
+    df_view_state = gr.State(pd.DataFrame())     # Last view slice (for export)
+    # --- WIRES ---
+    # Reload data
+    reload_btn.click(
+        fn=init_load,
+        inputs=[max_rows, page_size],
+        outputs=[
+            df_out,          # visible table
+            df_full_state,   # full df
+            df_filtered_state,  # filtered df (starts = full)
+            page_index_state,
+            stats_md,
+            field_dropdown,
+            visible_cols
+        ]
+    )
+    # Apply filter
+    # Note: also updates df_view_state for export
+    def _apply_and_store(search, field, df_full, page_size_val, visible):
+        view, df_filt, page0, stats = apply_filter(search, field, df_full, int(page_size_val), visible)
+        # Store the current view in a state for export
+        # (Gradio Dataframe returns a dict on update; we reconstruct from df_filt slice)
+        if isinstance(view, dict) and "value" in view:
+            df_view = view["value"]
+        else:
+            df_view = pd.DataFrame()
+        return view, df_filt, page0, stats, df_view
+    apply_btn = gr.Button("Apply filter", variant="primary")
+    apply_btn.click(
+        fn=_apply_and_store,
+        inputs=[search_term, field_dropdown, df_full_state, page_size, visible_cols],
+        outputs=[df_out, df_filtered_state, page_index_state, stats_md, df_view_state]
+    )
+    # Reset filters
+    def _reset_and_store(df_full, page_size_val, visible):
+        view, df_filt, page0, stats = reset_filters(df_full, int(page_size_val), visible)
+        if isinstance(view, dict) and "value" in view:
+            df_view = view["value"]
+        else:
+            df_view = pd.DataFrame()
+        return view, df_filt, page0, stats, df_view, gr.update(value=""), gr.update(value=None)
+    reset_btn.click(
+        fn=_reset_and_store,
+        inputs=[df_full_state, page_size, visible_cols],
+        outputs=[df_out, df_filtered_state, page_index_state, stats_md, df_view_state, search_term, field_dropdown]
+    )
+    # Load more
+    def _more_and_store(df_filt, page_idx, page_size_val, visible):
+        view, new_page = load_more(df_filt, int(page_idx), int(page_size_val), visible)
+        if isinstance(view, dict) and "value" in view:
+            df_view = view["value"]
+        else:
+            df_view = pd.DataFrame()
+        return view, new_page, df_view
+    load_more_btn.click(
+        fn=_more_and_store,
+        inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
+        outputs=[df_out, page_index_state, df_view_state]
+    )
+    # Export
+    csv_file = gr.File(label="Download CSV", interactive=False)
+    export_btn.click(
+        fn=export_csv,
+        inputs=[df_view_state],
+        outputs=[csv_file]
+    )
+    gr.Markdown("— **BuildScout v1.0** —")
+if __name__ == "__main__":
+    # Use 0.0.0.0 for WSL so VSCode/Browser can hit it via forwarded port.
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# utils.py
+import io
+import pandas as pd
+def pick_existing_columns(all_cols: list[str], desired: list[str]) -> list[str]:
+    seen = {c.lower(): c for c in all_cols}
+    out = []
+    for d in desired:
+        key = d.lower()
+        if key in seen:
+            out.append(seen[key])
+    return out or list(all_cols)  # fall back to everything if none matched
+def slice_up_to_page(df: pd.DataFrame, page: int, page_size: int) -> pd.DataFrame:
+    start = page * page_size
+    end = start + page_size
+    return df.iloc[:end].copy()
+def contains_any_column(df: pd.DataFrame, term: str) -> pd.Series:
+    if not term:
+        return pd.Series([True] * len(df), index=df.index)
+    term = str(term).strip()
+    if not term:
+        return pd.Series([True] * len(df), index=df.index)
+    # Combine columns as strings (fast-ish)
+    joined = df.astype(str).apply(lambda col: col.str.contains(term, case=False, na=False))
+    return joined.any(axis=1)
+def export_csv(df: pd.DataFrame) -> tuple[str, bytes]:
+    buf = io.StringIO()
+    df.to_csv(buf, index=False)
+    name = "buildscout_export.csv"
+    return name, buf.getvalue().encode("utf-8")