Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,7 +28,15 @@ def predict(text):
|
|
| 28 |
# generate and end generate if <|endoftext|> is not in text
|
| 29 |
gen_tokens = model.generate(
|
| 30 |
tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8,
|
| 31 |
-
no_repeat_ngram_size=3, repetition_penalty=1.2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
)
|
| 33 |
generated = tokenizer.batch_decode(gen_tokens)[0]
|
| 34 |
return generated
|
|
|
|
| 28 |
# generate and end generate if <|endoftext|> is not in text
|
| 29 |
gen_tokens = model.generate(
|
| 30 |
tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8,
|
| 31 |
+
no_repeat_ngram_size=3, repetition_penalty=1.2,
|
| 32 |
+
bad_words_ids=[
|
| 33 |
+
tokenizer.encode('...'),
|
| 34 |
+
tokenizer.encode('....'),
|
| 35 |
+
tokenizer.encode('(중략)'),
|
| 36 |
+
tokenizer.encode('http')
|
| 37 |
+
],
|
| 38 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 39 |
+
pad_token_id=tokenizer.pad_token_id
|
| 40 |
)
|
| 41 |
generated = tokenizer.batch_decode(gen_tokens)[0]
|
| 42 |
return generated
|