Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,16 +7,15 @@ import streamlit as st
|
|
| 7 |
import torch
|
| 8 |
|
| 9 |
# Load the BillSum dataset
|
| 10 |
-
|
| 11 |
-
|
| 12 |
ds = load_dataset("FiscalNote/billsum")
|
|
|
|
| 13 |
# Initialize models
|
| 14 |
sbert_model = SentenceTransformer("all-mpnet-base-v2")
|
| 15 |
t5_tokenizer = AutoTokenizer.from_pretrained("t5-small")
|
| 16 |
-
t5_model =
|
| 17 |
|
| 18 |
# Prepare data and build FAISS index
|
| 19 |
-
texts =
|
| 20 |
case_embeddings = sbert_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
|
| 21 |
|
| 22 |
# Convert embeddings to numpy array and handle deprecation warning
|
|
|
|
| 7 |
import torch
|
| 8 |
|
| 9 |
# Load the BillSum dataset
|
|
|
|
|
|
|
| 10 |
ds = load_dataset("FiscalNote/billsum")
|
| 11 |
+
|
| 12 |
# Initialize models
|
| 13 |
sbert_model = SentenceTransformer("all-mpnet-base-v2")
|
| 14 |
t5_tokenizer = AutoTokenizer.from_pretrained("t5-small")
|
| 15 |
+
t5_model = AutoAutoModelForSeq2SeqLM.from_pretrained("t5-small")
|
| 16 |
|
| 17 |
# Prepare data and build FAISS index
|
| 18 |
+
texts = ds["train"]["text"][:100] # Limiting to 100 samples for speed, and selecting the train split.
|
| 19 |
case_embeddings = sbert_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
|
| 20 |
|
| 21 |
# Convert embeddings to numpy array and handle deprecation warning
|