Spaces:

BACKENDAPI2024
/

aiisc-watermarking-modelv3

Sleeping

App Files Files Community

aiisc-watermarking-modelv3 / paraphraser.py

jgyasu

Upload folder using huggingface_hub

436c4c1 verified about 1 year ago

raw

history blame contribute delete

3.59 kB

	# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	# # Function to Initialize the Model
	# def init_model():
	# para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
	# para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
	# return para_tokenizer, para_model

	# # Function to Paraphrase the Text
	# def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
	# input_ids = para_tokenizer(
	# f'paraphrase: {question}',
	# return_tensors="pt", padding="longest",
	# max_length=max_length,
	# truncation=True,
	# ).input_ids
	# outputs = para_model.generate(
	# input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
	# num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
	# num_beams=num_beams, num_beam_groups=num_beam_groups,
	# max_length=max_length, diversity_penalty=diversity_penalty
	# )
	# res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
	# return res

	# def generate_paraphrase(question):
	# para_tokenizer, para_model = init_model()
	# res = paraphrase(question, para_tokenizer, para_model)
	# return res

	# print(generate_paraphrase("Donald Trump said at a campaign rally event in Wilkes-Barre, Pennsylvania, that there has “never been a more dangerous time 5since the Holocaust” to be Jewish in the United States."))

	'''
	Accepts a sentence or list of sentences and returns a lit of all their paraphrases using GPT-4.
	'''

	from openai import OpenAI
	from dotenv import load_dotenv
	load_dotenv()
	import os

	key = os.getenv("OPENAI_API_KEY")

	# Initialize the OpenAI client
	client = OpenAI(
	api_key=key # Replace with your actual API key
	)

	# Function to paraphrase sentences using GPT-4
	def generate_paraphrase(sentences, model="gpt-4o", num_paraphrases=10, max_tokens=150, temperature=0.7):
	# Ensure sentences is a list even if a single sentence is passed
	if isinstance(sentences, str):
	sentences = [sentences]

	paraphrased_sentences_list = []

	for sentence in sentences:
	full_prompt = f"Paraphrase the following text: '{sentence}'"
	try:
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": full_prompt,
	}
	],
	model=model,
	max_tokens=max_tokens,
	temperature=temperature,
	n=num_paraphrases # Number of paraphrased sentences to generate
	)
	# Extract the paraphrased sentences from the response
	paraphrased_sentences = [choice.message.content.strip() for choice in chat_completion.choices]
	# Append paraphrased sentences to the list
	paraphrased_sentences_list.extend(paraphrased_sentences)
	except Exception as e:
	print(f"Error paraphrasing sentence '{sentence}': {e}")

	return paraphrased_sentences_list

	result = generate_paraphrase("Mayor Eric Adams did not attend the first candidate forum for the New York City mayoral race, but his record — and the criminal charges he faces — received plenty of attention on Saturday from the Democrats who are running to unseat him.")

	print(len(result))