Chest Pain commited on
Commit
0b5326d
·
1 Parent(s): 18c3dae

Add initial BuildScout source code

Browse files
Files changed (6) hide show
  1. app.py +12 -0
  2. config.py +49 -0
  3. gui.py +378 -0
  4. services/data.py +231 -0
  5. singleapp.py +339 -0
  6. utils.py +34 -0
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gui import create_app
2
+
3
+ if __name__ == "__main__":
4
+ demo = create_app()
5
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
6
+
7
+
8
+ # from gui import create_app
9
+
10
+ # if __name__ == "__main__":
11
+ # demo = create_app()
12
+ # demo.launch(server_name="0.0.0.0", server_port=7860)
config.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ import os
3
+
4
+ # --- API credentials (env) ---
5
+ SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN", "").strip()
6
+
7
+ # --- Defaults for the UI ---
8
+ DEFAULT_API_LIMIT = int(os.getenv("DEFAULT_API_LIMIT", "5000"))
9
+ DEFAULT_PAGE_SIZE = int(os.getenv("DEFAULT_PAGE_SIZE", "200"))
10
+
11
+ # Initial visible columns (shown if they exist in the dataset)
12
+ DEFAULT_VISIBLE_COLUMNS = [
13
+ "filing_date",
14
+ "borough",
15
+ "full_address",
16
+ "street_name",
17
+ "house_no",
18
+ "block",
19
+ "lot",
20
+ "job_filing_number",
21
+ "job_type",
22
+ "filing_status",
23
+ "job_status",
24
+ "job_status_descrp",
25
+ "job_description",
26
+ ]
27
+
28
+ # Datasets exposed in the UI selector.
29
+ # Keys are internal IDs the service layer understands; labels show in the UI.
30
+ DEFAULT_DATASETS = [
31
+ ("job_filings", "DOB NOW – Job Filings (w9ak-ipjd)"),
32
+ ("legacy_jobs", "Legacy Job Applications (ic3t-wcy2)"),
33
+ ("permit_issuance", "Permit Issuance (rbx6-tga4)"),
34
+ ]
35
+
36
+ # For convenience, a canonical borough ordering & mapping (used in services)
37
+ BOROUGH_MAP = {
38
+ "MN": "MANHATTAN",
39
+ "BX": "BRONX",
40
+ "BK": "BROOKLYN",
41
+ "QN": "QUEENS",
42
+ "SI": "STATEN ISLAND",
43
+ "MANHATTAN": "MANHATTAN",
44
+ "BRONX": "BRONX",
45
+ "BROOKLYN": "BROOKLYN",
46
+ "QUEENS": "QUEENS",
47
+ "STATEN ISLAND": "STATEN ISLAND",
48
+ }
49
+ BOROUGH_ORDER = ["MANHATTAN", "BRONX", "BROOKLYN", "QUEENS", "STATEN ISLAND"]
gui.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gui.py
2
+ import os
3
+ import io
4
+ import uuid
5
+ import pandas as pd
6
+ import gradio as gr
7
+
8
+ from config import (
9
+ DEFAULT_DATASETS,
10
+ DEFAULT_API_LIMIT,
11
+ DEFAULT_VISIBLE_COLUMNS,
12
+ BOROUGH_ORDER,
13
+ )
14
+ from services.data import SocrataClient
15
+
16
+ #-- global --
17
+ APP_NAME = "BuildScout"
18
+ APP_VERSION = "1.8-beta" # bump to 1.81, 1.82, ... until v2.0 release
19
+ HEADER_TITLE = f"{APP_NAME} v{APP_VERSION}"
20
+ HEADER_SUB = "NYC DOB sales-leads explorer (DOB NOW filings joined against BIS permit issuance)"
21
+
22
+
23
+ # ---------------- helpers ----------------
24
+
25
+ LEADS_KEY = "leads_unpermitted"
26
+ LEADS_LABEL = "Sales Leads — Job Filings without Issued Permit"
27
+
28
+ def _dataset_label_key_maps():
29
+ """
30
+ Build label<->key maps for dropdown.
31
+ Adds a virtual 'leads' source on top of DEFAULT_DATASETS.
32
+ """
33
+ key_to_label = {k: v for k, v in DEFAULT_DATASETS}
34
+ label_to_key = {v: k for k, v in DEFAULT_DATASETS}
35
+
36
+ # Inject virtual "leads" at the front
37
+ labels = [LEADS_LABEL] + [v for _, v in DEFAULT_DATASETS]
38
+ label_to_key[LEADS_LABEL] = LEADS_KEY
39
+ key_to_label[LEADS_KEY] = LEADS_LABEL
40
+ return labels, label_to_key, key_to_label
41
+
42
+
43
+ def _sanitize_visible(visible: list[str], all_cols: list[str]) -> list[str]:
44
+ s = set(all_cols)
45
+ cleaned = [c for c in (visible or []) if c in s]
46
+ if cleaned:
47
+ return cleaned
48
+ default = [c for c in DEFAULT_VISIBLE_COLUMNS if c in s]
49
+ return default or all_cols[: min(10, len(all_cols))]
50
+
51
+
52
+ def _contains_any_column(df: pd.DataFrame, query: str) -> pd.Series:
53
+ if not query:
54
+ return pd.Series([True] * len(df), index=df.index)
55
+ q = str(query).strip().lower()
56
+ if q == "":
57
+ return pd.Series([True] * len(df), index=df.index)
58
+ return df.astype(str).apply(lambda row: any(q in str(v).lower() for v in row), axis=1)
59
+
60
+
61
+ def _slice_up_to_page(df: pd.DataFrame, page_index: int, page_size: int) -> pd.DataFrame:
62
+ start = page_index * page_size
63
+ end = start + page_size
64
+ return df.iloc[start:end].copy()
65
+
66
+
67
+ def _order_columns_for_display(df: pd.DataFrame) -> pd.DataFrame:
68
+ vis = [c for c in DEFAULT_VISIBLE_COLUMNS if c in df.columns]
69
+ rest = [c for c in df.columns if c not in vis]
70
+ return df[vis + rest] if vis else df
71
+
72
+
73
+ def _apply_borough_order(df: pd.DataFrame) -> pd.DataFrame:
74
+ if "borough" in df.columns:
75
+ try:
76
+ cat = pd.CategoricalDtype(categories=BOROUGH_ORDER, ordered=True)
77
+ df["borough"] = df["borough"].astype(cat)
78
+ except Exception:
79
+ pass
80
+ return df
81
+
82
+
83
+ def _sort_by_date(df: pd.DataFrame, ascending: bool) -> pd.DataFrame:
84
+ if "filing_date" not in df.columns:
85
+ return df
86
+ try:
87
+ tmp = df.copy()
88
+ if not pd.api.types.is_datetime64_any_dtype(tmp["filing_date"]):
89
+ tmp["_dt"] = pd.to_datetime(tmp["filing_date"], errors="coerce", utc=False)
90
+ else:
91
+ tmp["_dt"] = tmp["filing_date"]
92
+ tmp = tmp.sort_values("_dt", ascending=ascending, na_position="last").drop(columns=["_dt"])
93
+ return tmp
94
+ except Exception:
95
+ return df
96
+
97
+
98
+ # ---------------- data layer wrapper ----------------
99
+
100
+ _client = SocrataClient()
101
+
102
+ def fetch_dataset(dataset_key: str, limit: int) -> tuple[pd.DataFrame, float]:
103
+ """
104
+ Wrapper for all sources, including the virtual 'leads' source.
105
+ """
106
+ if dataset_key == LEADS_KEY:
107
+ df, secs = _client.fetch_leads_unpermitted(limit_filings=limit, limit_permits=limit)
108
+ else:
109
+ df, secs = _client.fetch_permits(dataset_key=dataset_key, limit=limit)
110
+
111
+ if df.empty:
112
+ return df, secs
113
+ df = _apply_borough_order(df)
114
+ df = _order_columns_for_display(df)
115
+ return df, secs
116
+
117
+
118
+ # ---------------- UI app ----------------
119
+
120
+ def create_app():
121
+ labels, label_to_key, _ = _dataset_label_key_maps()
122
+
123
+ empty_df = pd.DataFrame(columns=DEFAULT_VISIBLE_COLUMNS)
124
+
125
+ with gr.Blocks(fill_height=True, title=HEADER_TITLE) as demo:
126
+ gr.Markdown(f"# {HEADER_TITLE}\n{HEADER_SUB}")
127
+
128
+ # --- Top controls
129
+ with gr.Row():
130
+ dataset_dd = gr.Dropdown(
131
+ label="Dataset",
132
+ choices=labels,
133
+ value=LEADS_LABEL if labels else None, # default to Sales Leads
134
+ allow_custom_value=False,
135
+ info="Choose a dataset to load."
136
+ )
137
+ reload_btn = gr.Button("Reload", variant="primary")
138
+ reset_btn = gr.Button("Reset filters")
139
+ export_btn = gr.Button("Export CSV")
140
+
141
+ with gr.Row():
142
+ max_rows = gr.Number(
143
+ label="API max rows",
144
+ value=int(DEFAULT_API_LIMIT),
145
+ precision=0,
146
+ info="Maximum rows to request from the API (token may cap it)."
147
+ )
148
+ page_size = gr.Number(
149
+ label="Rows / page",
150
+ value=200,
151
+ precision=0
152
+ )
153
+ search_term = gr.Textbox(
154
+ label="Search",
155
+ placeholder="Free-text search across all columns…"
156
+ )
157
+
158
+ with gr.Row():
159
+ sort_order = gr.Radio(
160
+ label="Sort by filing_date",
161
+ choices=["Desc", "Asc"],
162
+ value="Desc",
163
+ info="Descending is newest-first."
164
+ )
165
+
166
+ with gr.Accordion("Columns", open=False):
167
+ visible_cols = gr.Dropdown(
168
+ label="Visible columns",
169
+ multiselect=True,
170
+ choices=[], # set after first load
171
+ value=[], # set after first load
172
+ allow_custom_value=False,
173
+ )
174
+
175
+ status_md = gr.Markdown("_Nothing loaded yet_")
176
+
177
+ with gr.Group():
178
+ df_out = gr.Dataframe(
179
+ value=empty_df,
180
+ type="pandas",
181
+ row_count=(0, "dynamic"),
182
+ col_count=(len(DEFAULT_VISIBLE_COLUMNS), "dynamic"),
183
+ interactive=False,
184
+ wrap=False,
185
+ label="Results",
186
+ )
187
+
188
+ load_more_btn = gr.Button("Load more rows")
189
+ csv_file = gr.File(label="Download CSV", visible=False)
190
+
191
+ # ----- states -----
192
+ df_full_state = gr.State(pd.DataFrame())
193
+ df_filtered_state = gr.State(pd.DataFrame())
194
+ df_view_state = gr.State(pd.DataFrame())
195
+ page_index_state = gr.State(0)
196
+ current_label_state = gr.State("")
197
+ current_key_state = gr.State("")
198
+
199
+ # -------- init / reload ----------
200
+ def _init_load(label, max_rows_val, page_sz, order):
201
+ if not label:
202
+ return (
203
+ empty_df, pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), 0, "", "",
204
+ "_Select a dataset_", gr.update(choices=[], value=[]), empty_df
205
+ )
206
+
207
+ dataset_key = label_to_key.get(label)
208
+ if not dataset_key:
209
+ return (
210
+ empty_df, pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), 0, "", "",
211
+ f"_Unknown dataset selection: {label}_",
212
+ gr.update(choices=[], value=[]), empty_df
213
+ )
214
+
215
+ try:
216
+ limit = int(max_rows_val) if max_rows_val is not None else int(DEFAULT_API_LIMIT)
217
+ except Exception:
218
+ limit = int(DEFAULT_API_LIMIT)
219
+
220
+ df, secs = fetch_dataset(dataset_key, limit)
221
+ if df.empty:
222
+ return (
223
+ empty_df, empty_df, empty_df, empty_df, 0, label, dataset_key,
224
+ f"🗂️ **{label}** — 0 rows returned in {secs:.2f}s.",
225
+ gr.update(choices=[], value=[]),
226
+ empty_df
227
+ )
228
+
229
+ asc = (order == "Asc")
230
+ df_sorted = _sort_by_date(df, ascending=asc)
231
+
232
+ cols_sorted = sorted(df_sorted.columns)
233
+ visible = _sanitize_visible(DEFAULT_VISIBLE_COLUMNS, cols_sorted)
234
+
235
+ view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
236
+ stats = f"✅ **{label}** — loaded **{len(df_sorted):,}** rows in **{secs:.2f}s**."
237
+
238
+ return (
239
+ view,
240
+ df_sorted,
241
+ df_sorted,
242
+ view,
243
+ 0,
244
+ label,
245
+ dataset_key,
246
+ stats,
247
+ gr.update(choices=cols_sorted, value=visible),
248
+ view
249
+ )
250
+
251
+ reload_btn.click(
252
+ fn=_init_load,
253
+ inputs=[dataset_dd, max_rows, page_size, sort_order],
254
+ outputs=[
255
+ df_out, df_full_state, df_filtered_state, df_view_state,
256
+ page_index_state, current_label_state, current_key_state,
257
+ status_md, visible_cols, df_view_state
258
+ ]
259
+ )
260
+
261
+ # Auto-load default (Sales Leads) on start
262
+ demo.load(
263
+ _init_load,
264
+ inputs=[dataset_dd, max_rows, page_size, sort_order],
265
+ outputs=[
266
+ df_out, df_full_state, df_filtered_state, df_view_state,
267
+ page_index_state, current_label_state, current_key_state,
268
+ status_md, visible_cols, df_view_state
269
+ ]
270
+ )
271
+
272
+ # -------- apply filter ----------
273
+ def _apply_filter(query, df_full, page_sz, visible, order):
274
+ if df_full is None or df_full.empty:
275
+ return empty_df, empty_df, 0, "_Nothing to filter_", empty_df, gr.update()
276
+
277
+ cols_sorted = sorted(df_full.columns)
278
+ visible = _sanitize_visible(visible, cols_sorted)
279
+
280
+ mask = _contains_any_column(df_full, query)
281
+ df_filt = df_full.loc[mask].copy()
282
+
283
+ asc = (order == "Asc")
284
+ df_filt = _sort_by_date(df_filt, ascending=asc)
285
+
286
+ view = _slice_up_to_page(df_filt[visible], 0, int(page_sz))
287
+ stats = f"Filtered: **{len(df_filt):,}** rows match"
288
+ return view, df_filt, 0, stats, view, gr.update(choices=cols_sorted, value=visible)
289
+
290
+ apply_btn = gr.Button("Apply filter")
291
+ apply_btn.click(
292
+ fn=_apply_filter,
293
+ inputs=[search_term, df_full_state, page_size, visible_cols, sort_order],
294
+ outputs=[df_out, df_filtered_state, page_index_state, status_md, df_view_state, visible_cols]
295
+ )
296
+
297
+ # -------- reset ----------
298
+ def _reset(df_full, page_sz, visible, order, label):
299
+ if df_full is None or df_full.empty:
300
+ return empty_df, empty_df, 0, "_Nothing loaded yet_", empty_df, gr.update()
301
+
302
+ cols_sorted = sorted(df_full.columns)
303
+ visible = _sanitize_visible(visible, cols_sorted)
304
+
305
+ asc = (order == "Asc")
306
+ df_sorted = _sort_by_date(df_full, ascending=asc)
307
+
308
+ view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
309
+ stats = f"{label} — Reset: **{len[df_sorted]:,}** rows"
310
+ return view, df_sorted, 0, stats, view, gr.update(choices=cols_sorted, value=visible)
311
+
312
+ reset_btn.click(
313
+ fn=_reset,
314
+ inputs=[df_full_state, page_size, visible_cols, sort_order, current_label_state],
315
+ outputs=[df_out, df_filtered_state, page_index_state, status_md, df_view_state, visible_cols]
316
+ )
317
+
318
+ # -------- sort order change ----------
319
+ def _resort(df_filt, page_sz, visible, order):
320
+ if df_filt is None or df_filt.empty:
321
+ return empty_df, empty_df, 0, empty_df
322
+ cols_sorted = sorted(df_filt.columns)
323
+ visible = _sanitize_visible(visible, cols_sorted)
324
+ asc = (order == "Asc")
325
+ df_sorted = _sort_by_date(df_filt, ascending=asc)
326
+ view = _slice_up_to_page(df_sorted[visible], 0, int(page_sz))
327
+ return view, df_sorted, 0, view
328
+
329
+ sort_order.change(
330
+ fn=_resort,
331
+ inputs=[df_filtered_state, page_size, visible_cols, sort_order],
332
+ outputs=[df_out, df_filtered_state, page_index_state, df_view_state]
333
+ )
334
+
335
+ # -------- visible columns change ----------
336
+ def _change_columns(df_filt, page_idx, page_sz, visible):
337
+ if df_filt is None or df_filt.empty:
338
+ return empty_df, empty_df
339
+ cols_sorted = sorted(df_filt.columns)
340
+ visible = _sanitize_visible(visible, cols_sorted)
341
+ view = _slice_up_to_page(df_filt[visible], int(page_idx), int(page_sz))
342
+ return view, view
343
+
344
+ visible_cols.change(
345
+ fn=_change_columns,
346
+ inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
347
+ outputs=[df_out, df_view_state]
348
+ )
349
+
350
+ # -------- load more ----------
351
+ def _load_more(df_filt, page_idx, page_sz, visible):
352
+ if df_filt is None or df_filt.empty:
353
+ return empty_df, 0, empty_df
354
+ cols_sorted = sorted(df_filt.columns)
355
+ visible = _sanitize_visible(visible, cols_sorted)
356
+ new_page = int(page_idx) + 1
357
+ view = _slice_up_to_page(df_filt[visible], new_page, int(page_sz))
358
+ return view, new_page, view
359
+
360
+ load_more_btn.click(
361
+ fn=_load_more,
362
+ inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
363
+ outputs=[df_out, page_index_state, df_view_state]
364
+ )
365
+
366
+ # -------- export (File control) ----------
367
+ def _export(df_view):
368
+ if df_view is None or df_view.empty:
369
+ return gr.update(value=None, visible=False)
370
+ path = os.path.join("/tmp", f"nyc_dob_{uuid.uuid4().hex}.csv")
371
+ df_view.to_csv(path, index=False)
372
+ return gr.update(value=path, visible=True)
373
+
374
+ export_btn.click(_export, inputs=[df_view_state], outputs=[csv_file])
375
+
376
+ gr.Markdown(f"*{APP_NAME} {APP_VERSION}* · Tip: use the columns selector to display or hide more columns.")
377
+
378
+ return demo
services/data.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # services/data.py
2
+ import os
3
+ import time
4
+ import requests
5
+ import pandas as pd
6
+ from typing import Tuple, Dict, Any, List
7
+
8
+ from config import SOCRATA_APP_TOKEN, BOROUGH_MAP
9
+
10
+ # Socrata endpoints (NYC Open Data)
11
+ DATASET_URLS = {
12
+ "job_filings": "https://data.cityofnewyork.us/resource/w9ak-ipjd.json", # DOB NOW: Job Filings
13
+ "legacy_jobs": "https://data.cityofnewyork.us/resource/ic3t-wcy2.json", # Legacy BIS Jobs
14
+ "permit_issuance": "https://data.cityofnewyork.us/resource/rbx6-tga4.json", # Permit Issuance (BIS)
15
+ }
16
+
17
+ # Per dataset: how to read core fields
18
+ DATASET_FIELD_MAP: Dict[str, Dict[str, str]] = {
19
+ # DOB NOW: Job Filings
20
+ "job_filings": {
21
+ "filing_date": "filing_date",
22
+ "house_no": "house_no",
23
+ "street_name": "street_name",
24
+ "borough": "borough",
25
+ "zip": "zip",
26
+ "desc": "job_description",
27
+ "job_id": "job_filing_number",
28
+ "job_status": "filing_status",
29
+ },
30
+ # Legacy BIS Jobs
31
+ "legacy_jobs": {
32
+ "filing_date": "pre__filing_date",
33
+ "house_no": "house__",
34
+ "street_name": "street_name",
35
+ "borough": "borough",
36
+ "zip": "zip",
37
+ "desc": "job_description",
38
+ "job_id": "job__",
39
+ "job_status": "job_status",
40
+ },
41
+ # Permit Issuance (BIS)
42
+ "permit_issuance": {
43
+ "filing_date": "approved_date",
44
+ "house_no": "house__",
45
+ "street_name": "street_name",
46
+ "borough": "borough",
47
+ "zip": "zip_code",
48
+ "desc": "job_description",
49
+ "job_id": "job__", # BIS job number (e.g., 123456789)
50
+ "job_status": "filing_status",
51
+ },
52
+ }
53
+
54
+
55
+ def _headers() -> Dict[str, str]:
56
+ h = {}
57
+ if SOCRATA_APP_TOKEN:
58
+ h["X-App-Token"] = SOCRATA_APP_TOKEN
59
+ return h
60
+
61
+
62
+ def _request(url: str, params: Dict[str, Any]) -> List[Dict[str, Any]]:
63
+ r = requests.get(url, headers=_headers(), params=params, timeout=60)
64
+ if r.status_code != 200:
65
+ raise RuntimeError(f"API request failed: {r.status_code} {r.text}")
66
+ return r.json()
67
+
68
+
69
+ def _to_datetime(series: pd.Series) -> pd.Series:
70
+ try:
71
+ return pd.to_datetime(series, errors="coerce", utc=False)
72
+ except Exception:
73
+ return pd.to_datetime(pd.Series([], dtype="object"))
74
+
75
+
76
+ def _norm_borough(raw: pd.Series) -> pd.Series:
77
+ if raw is None:
78
+ return pd.Series([], dtype="object")
79
+ return raw.astype(str).str.strip().str.upper().map(lambda x: BOROUGH_MAP.get(x, x))
80
+
81
+
82
+ def _build_full_address(df: pd.DataFrame,
83
+ house_col: str,
84
+ street_col: str,
85
+ borough_col: str,
86
+ zip_col: str | None) -> pd.Series:
87
+ def join_addr(row):
88
+ parts = []
89
+ h = str(row.get(house_col, "") or "").strip()
90
+ s = str(row.get(street_col, "") or "").strip()
91
+ b = str(row.get(borough_col, "") or "").strip()
92
+ z = str(row.get(zip_col, "") or "").strip() if zip_col else ""
93
+ if h: parts.append(h)
94
+ if s: parts.append(s)
95
+ if b: parts.append(b)
96
+ if z: parts.append(z)
97
+ return ", ".join(p for p in parts if p)
98
+
99
+ return df.apply(join_addr, axis=1)
100
+
101
+
102
+ def _job_base_from_filing(job_filing_number: Any) -> str:
103
+ """
104
+ Normalize a DOB NOW job_filing_number like 'M0123-1234' → 'M0123'.
105
+ If not a string, returns ''.
106
+ """
107
+ if not isinstance(job_filing_number, str):
108
+ return ""
109
+ return job_filing_number.split("-", 1)[0].strip().upper()
110
+
111
+
112
+ def _job_base_from_permit(job_num: Any) -> str:
113
+ """
114
+ Normalize a BIS permit job number (often numeric) to uppercase string,
115
+ used only for comparison.
116
+ """
117
+ if job_num is None:
118
+ return ""
119
+ return str(job_num).strip().upper()
120
+
121
+
122
+ class SocrataClient:
123
+ def __init__(self):
124
+ if not SOCRATA_APP_TOKEN:
125
+ print("⚠️ Warning: SOCRATA_APP_TOKEN not set. You may be limited to 1,000 rows per call.")
126
+
127
+ def fetch_permits(self, dataset_key: str, limit: int = 5000) -> Tuple[pd.DataFrame, float]:
128
+ """
129
+ Fetch from one dataset key and normalize key columns.
130
+ Returns (df, seconds).
131
+ """
132
+ if dataset_key not in DATASET_URLS:
133
+ raise ValueError(f"Unknown dataset key: {dataset_key}")
134
+
135
+ url = DATASET_URLS[dataset_key]
136
+ fmap = DATASET_FIELD_MAP.get(dataset_key, {})
137
+ order_col = fmap.get("filing_date", ":id")
138
+
139
+ params = {
140
+ "$limit": int(limit),
141
+ "$order": f"{order_col} DESC" if order_col != ":id" else ":id",
142
+ }
143
+
144
+ t0 = time.time()
145
+ rows = _request(url, params)
146
+ secs = time.time() - t0
147
+
148
+ if not rows:
149
+ return pd.DataFrame(), secs
150
+
151
+ df = pd.DataFrame(rows)
152
+
153
+ # --- filing_date ---
154
+ filing_col = fmap.get("filing_date")
155
+ if filing_col and filing_col in df.columns:
156
+ df["filing_date"] = _to_datetime(df[filing_col])
157
+ else:
158
+ df["filing_date"] = pd.NaT
159
+
160
+ # --- borough ---
161
+ boro_col = fmap.get("borough")
162
+ if boro_col and boro_col in df.columns:
163
+ df["borough"] = _norm_borough(df[boro_col])
164
+ else:
165
+ df["borough"] = None
166
+
167
+ # --- Full address ---
168
+ house_col = fmap.get("house_no")
169
+ street_col = fmap.get("street_name")
170
+ zip_col = fmap.get("zip")
171
+ for c in [house_col, street_col, zip_col]:
172
+ if c and c not in df.columns:
173
+ df[c] = ""
174
+ df["full_address"] = _build_full_address(df, house_col or "", street_col or "", "borough", zip_col)
175
+
176
+ # --- Job status (harmonize light) ---
177
+ job_status_src = fmap.get("job_status")
178
+ if job_status_src and job_status_src in df.columns:
179
+ df["job_status"] = df[job_status_src]
180
+
181
+ # Keep dataset key
182
+ df["_dataset"] = dataset_key
183
+
184
+ # Natural sort newest first if we have filing_date
185
+ try:
186
+ if "filing_date" in df.columns:
187
+ df = df.sort_values("filing_date", ascending=False, kind="mergesort")
188
+ except Exception:
189
+ pass
190
+
191
+ return df, secs
192
+
193
+ # ---------- Sales Leads (anti-join): filings without permits ----------
194
+ def fetch_leads_unpermitted(self, limit_filings: int = 5000, limit_permits: int = 5000) -> Tuple[pd.DataFrame, float]:
195
+ """
196
+ Build a 'sales leads' view:
197
+ - Fetch DOB NOW Job Filings (w9ak-ipjd)
198
+ - Fetch BIS Permit Issuance (rbx6-tga4)
199
+ - Keep only filings whose 'base' job id has NO issued permit.
200
+ Returns (leads_df, seconds).
201
+ """
202
+ t0 = time.time()
203
+
204
+ filings_df, _ = self.fetch_permits("job_filings", limit_filings)
205
+ permits_df, _ = self.fetch_permits("permit_issuance", limit_permits)
206
+
207
+ if filings_df.empty:
208
+ return pd.DataFrame(), time.time() - t0
209
+
210
+ # Build a comparable "base" for both sides
211
+ if "job_filing_number" in filings_df.columns:
212
+ filings_bases = filings_df["job_filing_number"].map(_job_base_from_filing)
213
+ else:
214
+ # fallback: try any 'job_id' style col (from map) or empty
215
+ filings_bases = filings_df.get("job_id", pd.Series([""] * len(filings_df))).map(_job_base_from_filing)
216
+
217
+ permitted_bases: set[str] = set()
218
+ if not permits_df.empty:
219
+ # BIS permits often have 'job__' numeric
220
+ src = "job__" if "job__" in permits_df.columns else ("job_id" if "job_id" in permits_df.columns else None)
221
+ if src:
222
+ permitted_bases = set(permits_df[src].map(_job_base_from_permit).dropna().astype(str))
223
+
224
+ mask = ~filings_bases.isin(permitted_bases)
225
+ leads = filings_df.loc[mask].copy()
226
+
227
+ # Helpful flag for UI
228
+ leads["has_permit_already"] = False
229
+
230
+ secs = time.time() - t0
231
+ return leads, secs
singleapp.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from typing import List, Tuple, Optional
4
+
5
+ import pandas as pd
6
+ import requests
7
+ import gradio as gr
8
+
9
+ # -------------------------
10
+ # Config
11
+ # -------------------------
12
+ # NYC Open Data (Socrata) dataset — you can swap later as needed.
13
+ # Using the "w9ak-ipjd" permits dataset you tested most recently.
14
+ SOCRATA_URL = "https://data.cityofnewyork.us/resource/w9ak-ipjd.json"
15
+ # Read your app token from environment (recommended) or leave empty
16
+ SOCRATA_APP_TOKEN = os.getenv("SOCRATA_APP_TOKEN", "").strip()
17
+
18
+ DEFAULT_API_LIMIT = 3000 # how many rows to fetch from API initially
19
+ DEFAULT_PAGE_SIZE = 300 # how many rows to show per “page”
20
+ DEFAULT_ORDER = "filing_date DESC" # server-side sort (if the column exists)
21
+ DATE_COLUMNS_GUESS = [
22
+ "filing_date",
23
+ "latest_action_date",
24
+ "pre__filing_date",
25
+ "approved",
26
+ "signoff_date",
27
+ ]
28
+
29
+ # Friendly default “important” columns to show (use only if present)
30
+ DEFAULT_VISIBLE_COLUMNS = [
31
+ "job__", "doc__", "borough", "house__", "street_name",
32
+ "filing_date", "job_description"
33
+ ]
34
+
35
+
36
+ # -------------------------
37
+ # Data access
38
+ # -------------------------
39
+ def fetch_permits(limit: int = DEFAULT_API_LIMIT,
40
+ order: str = DEFAULT_ORDER) -> Tuple[pd.DataFrame, float]:
41
+ """
42
+ Fetch up to `limit` rows from the Socrata API, optionally ordering.
43
+ Returns (DataFrame, seconds_elapsed).
44
+ """
45
+ headers = {}
46
+ if SOCRATA_APP_TOKEN:
47
+ headers["X-App-Token"] = SOCRATA_APP_TOKEN
48
+
49
+ params = {"$limit": limit}
50
+ if order:
51
+ params["$order"] = order
52
+
53
+ t0 = time.time()
54
+ r = requests.get(SOCRATA_URL, headers=headers, params=params, timeout=60)
55
+ r.raise_for_status()
56
+ data = r.json()
57
+ elapsed = time.time() - t0
58
+
59
+ if not data:
60
+ return pd.DataFrame(), elapsed
61
+
62
+ df = pd.DataFrame(data)
63
+
64
+ # Parse date-like columns (if present)
65
+ for col in DATE_COLUMNS_GUESS:
66
+ if col in df.columns:
67
+ # Try to parse; errors='coerce' leaves invalid as NaT
68
+ df[col] = pd.to_datetime(df[col], errors="coerce")
69
+
70
+ # Secondary local sort (just in case server-side order was ignored)
71
+ if "filing_date" in df.columns:
72
+ df = df.sort_values("filing_date", ascending=False, na_position="last").reset_index(drop=True)
73
+
74
+ return df, elapsed
75
+
76
+
77
+ # -------------------------
78
+ # Helpers
79
+ # -------------------------
80
+ def pick_existing_columns(all_cols: List[str],
81
+ desired: List[str]) -> List[str]:
82
+ """Return only the desired columns that actually exist in df."""
83
+ s = set(all_cols)
84
+ return [c for c in desired if c in s]
85
+
86
+
87
+ def slice_up_to_page(df: pd.DataFrame, page_index: int, page_size: int) -> pd.DataFrame:
88
+ """Return cumulative slice (first N pages)."""
89
+ end = (page_index + 1) * page_size
90
+ return df.iloc[:end].copy()
91
+
92
+
93
+ def contains_any_column(df: pd.DataFrame, term: str) -> pd.Series:
94
+ """Case-insensitive 'contains' across object columns."""
95
+ if term == "":
96
+ return pd.Series([True] * len(df), index=df.index)
97
+
98
+ text_cols = [c for c in df.columns if df[c].dtype == object]
99
+ if not text_cols:
100
+ # fallback: stringify entire df (rare)
101
+ return df.astype(str).apply(
102
+ lambda row: term.lower() in row.to_string().lower(), axis=1
103
+ )
104
+ mask = pd.Series(False, index=df.index)
105
+ for c in text_cols:
106
+ mask = mask | df[c].astype(str).str.contains(term, case=False, na=False)
107
+ return mask
108
+
109
+
110
+ # -------------------------
111
+ # Gradio app logic
112
+ # -------------------------
113
+ def init_load(max_rows: int,
114
+ page_size: int) -> tuple:
115
+ """Reload from API. Reset state & UI."""
116
+ try:
117
+ df, seconds = fetch_permits(limit=max_rows, order=DEFAULT_ORDER)
118
+ except Exception as e:
119
+ return (gr.update(value=pd.DataFrame()),
120
+ gr.update(value=pd.DataFrame()),
121
+ gr.update(value=pd.DataFrame()),
122
+ 0,
123
+ gr.update(value=f"_Error while loading data:_ `{e}`"),
124
+ gr.update(choices=[], value=None),
125
+ gr.update(choices=[], value=[]))
126
+
127
+ if df.empty:
128
+ return (gr.update(value=pd.DataFrame()),
129
+ df, df, 0,
130
+ f"_Loaded 0 records in {seconds:.1f} seconds._",
131
+ gr.update(choices=[], value=None),
132
+ gr.update(choices=[], value=[]))
133
+
134
+ # Visible columns defaults (only those that exist)
135
+ visible = pick_existing_columns(df.columns.tolist(), DEFAULT_VISIBLE_COLUMNS)
136
+ if not visible: # fallback: first ~10 columns
137
+ visible = df.columns.tolist()[:10]
138
+
139
+ # Build dropdown choices (filterable fields)
140
+ filterable_choices = sorted(df.columns.tolist())
141
+
142
+ # First page slice for view
143
+ view = slice_up_to_page(df[visible], page_index=0, page_size=page_size)
144
+
145
+ stats = f"Loaded **{len(df):,}** records in **{seconds:.1f}** seconds."
146
+ return (gr.update(value=view),
147
+ df, df, 0,
148
+ stats,
149
+ gr.update(choices=filterable_choices, value="borough" if "borough" in filterable_choices else filterable_choices[0]),
150
+ gr.update(choices=visible, value=visible))
151
+
152
+
153
+ def apply_filter(search_term: str,
154
+ field: Optional[str],
155
+ df_full: pd.DataFrame,
156
+ page_size: int,
157
+ visible_cols: List[str]) -> tuple:
158
+ """
159
+ Filter df_full by search_term in the selected field (or across all text fields
160
+ if field is None/empty). Reset to page 0, update table & stats.
161
+ """
162
+ if df_full is None or df_full.empty:
163
+ return (gr.update(value=pd.DataFrame()), df_full, 0, "_No data loaded yet._")
164
+
165
+ t0 = time.time()
166
+ if search_term is None:
167
+ search_term = ""
168
+ search_term = search_term.strip()
169
+
170
+ if field and field in df_full.columns and search_term != "":
171
+ # Single field filter (convert to str for robustness)
172
+ mask = df_full[field].astype(str).str.contains(search_term, case=False, na=False)
173
+ else:
174
+ # All columns filter (object columns)
175
+ mask = contains_any_column(df_full, search_term)
176
+
177
+ df_filtered = df_full.loc[mask].copy()
178
+
179
+ # Respect ‘visible_cols’ if provided
180
+ use_cols = [c for c in visible_cols if c in df_filtered.columns] if visible_cols else df_filtered.columns.tolist()
181
+
182
+ # Sort by filing_date desc (if exists)
183
+ if "filing_date" in df_filtered.columns:
184
+ df_filtered = df_filtered.sort_values("filing_date", ascending=False, na_position="last")
185
+
186
+ view = slice_up_to_page(df_filtered[use_cols], page_index=0, page_size=page_size)
187
+
188
+ seconds = time.time() - t0
189
+ stats = f"Filtered to **{len(df_filtered):,}** records in **{seconds:.1f}** seconds."
190
+ return gr.update(value=view), df_filtered, 0, stats
191
+
192
+
193
+ def load_more(df_filtered: pd.DataFrame,
194
+ page_index: int,
195
+ page_size: int,
196
+ visible_cols: List[str]) -> tuple:
197
+ """Increase page and return cumulative rows."""
198
+ if df_filtered is None or df_filtered.empty:
199
+ return gr.update(value=pd.DataFrame()), page_index
200
+
201
+ new_page = page_index + 1
202
+ use_cols = [c for c in visible_cols if c in df_filtered.columns] if visible_cols else df_filtered.columns.tolist()
203
+ view = slice_up_to_page(df_filtered[use_cols], page_index=new_page, page_size=page_size)
204
+ return gr.update(value=view), new_page
205
+
206
+
207
+ def reset_filters(df_full: pd.DataFrame,
208
+ page_size: int,
209
+ visible_cols: List[str]) -> tuple:
210
+ """Clear search, reset dropdown, show first page of df_full."""
211
+ if df_full is None or df_full.empty:
212
+ return (gr.update(value=pd.DataFrame()), df_full, 0, "_No data loaded yet._")
213
+
214
+ use_cols = [c for c in visible_cols if c in df_full.columns] if visible_cols else df_full.columns.tolist()
215
+ view = slice_up_to_page(df_full[use_cols], page_index=0, page_size=page_size)
216
+ stats = f"Showing **{len(df_full):,}** records."
217
+ return gr.update(value=view), df_full, 0, stats
218
+
219
+
220
+ def export_csv(df_view: pd.DataFrame) -> str:
221
+ """Export current (visible) view to CSV; return file path."""
222
+ if df_view is None or df_view.empty:
223
+ # create an empty file anyway to keep UX consistent
224
+ path = "/mnt/data/buildscout_empty.csv"
225
+ pd.DataFrame().to_csv(path, index=False)
226
+ return path
227
+ path = "/mnt/data/buildscout_view.csv"
228
+ df_view.to_csv(path, index=False)
229
+ return path
230
+
231
+
232
+ # -------------------------
233
+ # UI
234
+ # -------------------------
235
+ with gr.Blocks(theme=gr.themes.Soft(), title="BuildScout v1.0") as demo:
236
+ gr.Markdown("# BuildScout v1.0 \nNYC DOB Permits (Gradio Edition)")
237
+
238
+ with gr.Row():
239
+ reload_btn = gr.Button("Reload DOB data", variant="secondary")
240
+ reset_btn = gr.Button("Reset filters", variant="secondary")
241
+ export_btn = gr.Button("Export current view (CSV)", variant="secondary")
242
+ max_rows = gr.Number(label="API Max Rows", value=DEFAULT_API_LIMIT, precision=0)
243
+ page_size = gr.Number(label="Rows per page", value=DEFAULT_PAGE_SIZE, precision=0)
244
+
245
+ with gr.Row():
246
+ search_term = gr.Textbox(label="Search term", placeholder="Type to search…")
247
+ field_dropdown = gr.Dropdown(label="Filter field", choices=[], value=None)
248
+ visible_cols = gr.CheckboxGroup(label="Visible columns (for display)", choices=[], value=[])
249
+
250
+ stats_md = gr.Markdown("_Load something!_")
251
+
252
+ df_out = gr.Dataframe(headers=[], row_count=10, col_count=(0, "dynamic"),
253
+ interactive=False, wrap=False, height=600)
254
+
255
+ load_more_btn = gr.Button("Load more rows")
256
+
257
+ # STATE
258
+ df_full_state = gr.State(pd.DataFrame()) # Full dataset loaded from API
259
+ df_filtered_state = gr.State(pd.DataFrame()) # Filtered subset (for display)
260
+ page_index_state = gr.State(0) # current page (0-based)
261
+ df_view_state = gr.State(pd.DataFrame()) # Last view slice (for export)
262
+
263
+ # --- WIRES ---
264
+ # Reload data
265
+ reload_btn.click(
266
+ fn=init_load,
267
+ inputs=[max_rows, page_size],
268
+ outputs=[
269
+ df_out, # visible table
270
+ df_full_state, # full df
271
+ df_filtered_state, # filtered df (starts = full)
272
+ page_index_state,
273
+ stats_md,
274
+ field_dropdown,
275
+ visible_cols
276
+ ]
277
+ )
278
+
279
+ # Apply filter
280
+ # Note: also updates df_view_state for export
281
+ def _apply_and_store(search, field, df_full, page_size_val, visible):
282
+ view, df_filt, page0, stats = apply_filter(search, field, df_full, int(page_size_val), visible)
283
+ # Store the current view in a state for export
284
+ # (Gradio Dataframe returns a dict on update; we reconstruct from df_filt slice)
285
+ if isinstance(view, dict) and "value" in view:
286
+ df_view = view["value"]
287
+ else:
288
+ df_view = pd.DataFrame()
289
+ return view, df_filt, page0, stats, df_view
290
+
291
+ apply_btn = gr.Button("Apply filter", variant="primary")
292
+ apply_btn.click(
293
+ fn=_apply_and_store,
294
+ inputs=[search_term, field_dropdown, df_full_state, page_size, visible_cols],
295
+ outputs=[df_out, df_filtered_state, page_index_state, stats_md, df_view_state]
296
+ )
297
+
298
+ # Reset filters
299
+ def _reset_and_store(df_full, page_size_val, visible):
300
+ view, df_filt, page0, stats = reset_filters(df_full, int(page_size_val), visible)
301
+ if isinstance(view, dict) and "value" in view:
302
+ df_view = view["value"]
303
+ else:
304
+ df_view = pd.DataFrame()
305
+ return view, df_filt, page0, stats, df_view, gr.update(value=""), gr.update(value=None)
306
+ reset_btn.click(
307
+ fn=_reset_and_store,
308
+ inputs=[df_full_state, page_size, visible_cols],
309
+ outputs=[df_out, df_filtered_state, page_index_state, stats_md, df_view_state, search_term, field_dropdown]
310
+ )
311
+
312
+ # Load more
313
+ def _more_and_store(df_filt, page_idx, page_size_val, visible):
314
+ view, new_page = load_more(df_filt, int(page_idx), int(page_size_val), visible)
315
+ if isinstance(view, dict) and "value" in view:
316
+ df_view = view["value"]
317
+ else:
318
+ df_view = pd.DataFrame()
319
+ return view, new_page, df_view
320
+
321
+ load_more_btn.click(
322
+ fn=_more_and_store,
323
+ inputs=[df_filtered_state, page_index_state, page_size, visible_cols],
324
+ outputs=[df_out, page_index_state, df_view_state]
325
+ )
326
+
327
+ # Export
328
+ csv_file = gr.File(label="Download CSV", interactive=False)
329
+ export_btn.click(
330
+ fn=export_csv,
331
+ inputs=[df_view_state],
332
+ outputs=[csv_file]
333
+ )
334
+
335
+ gr.Markdown("— **BuildScout v1.0** —")
336
+
337
+ if __name__ == "__main__":
338
+ # Use 0.0.0.0 for WSL so VSCode/Browser can hit it via forwarded port.
339
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ import io
3
+ import pandas as pd
4
+
5
+ def pick_existing_columns(all_cols: list[str], desired: list[str]) -> list[str]:
6
+ seen = {c.lower(): c for c in all_cols}
7
+ out = []
8
+ for d in desired:
9
+ key = d.lower()
10
+ if key in seen:
11
+ out.append(seen[key])
12
+ return out or list(all_cols) # fall back to everything if none matched
13
+
14
+ def slice_up_to_page(df: pd.DataFrame, page: int, page_size: int) -> pd.DataFrame:
15
+ start = page * page_size
16
+ end = start + page_size
17
+ return df.iloc[:end].copy()
18
+
19
+ def contains_any_column(df: pd.DataFrame, term: str) -> pd.Series:
20
+ if not term:
21
+ return pd.Series([True] * len(df), index=df.index)
22
+ term = str(term).strip()
23
+ if not term:
24
+ return pd.Series([True] * len(df), index=df.index)
25
+
26
+ # Combine columns as strings (fast-ish)
27
+ joined = df.astype(str).apply(lambda col: col.str.contains(term, case=False, na=False))
28
+ return joined.any(axis=1)
29
+
30
+ def export_csv(df: pd.DataFrame) -> tuple[str, bytes]:
31
+ buf = io.StringIO()
32
+ df.to_csv(buf, index=False)
33
+ name = "buildscout_export.csv"
34
+ return name, buf.getvalue().encode("utf-8")