| # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| # # Function to Initialize the Model | |
| # def init_model(): | |
| # para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
| # para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
| # return para_tokenizer, para_model | |
| # # Function to Paraphrase the Text | |
| # def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64): | |
| # input_ids = para_tokenizer( | |
| # f'paraphrase: {question}', | |
| # return_tensors="pt", padding="longest", | |
| # max_length=max_length, | |
| # truncation=True, | |
| # ).input_ids | |
| # outputs = para_model.generate( | |
| # input_ids, temperature=temperature, repetition_penalty=repetition_penalty, | |
| # num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, | |
| # num_beams=num_beams, num_beam_groups=num_beam_groups, | |
| # max_length=max_length, diversity_penalty=diversity_penalty | |
| # ) | |
| # res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| # return res | |
| # def generate_paraphrase(question): | |
| # para_tokenizer, para_model = init_model() | |
| # res = paraphrase(question, para_tokenizer, para_model) | |
| # return res | |
| # print(generate_paraphrase("Donald Trump said at a campaign rally event in Wilkes-Barre, Pennsylvania, that there has “never been a more dangerous time 5since the Holocaust” to be Jewish in the United States.")) | |
| ''' | |
| Accepts a sentence or list of sentences and returns a lit of all their paraphrases using GPT-4. | |
| ''' | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| key = os.getenv("OPENAI_API_KEY") | |
| # Initialize the OpenAI client | |
| client = OpenAI( | |
| api_key=key # Replace with your actual API key | |
| ) | |
| # Function to paraphrase sentences using GPT-4 | |
| def generate_paraphrase(sentences, model="gpt-4o", num_paraphrases=10, max_tokens=150, temperature=0.7): | |
| # Ensure sentences is a list even if a single sentence is passed | |
| if isinstance(sentences, str): | |
| sentences = [sentences] | |
| paraphrased_sentences_list = [] | |
| for sentence in sentences: | |
| full_prompt = f"Paraphrase the following text: '{sentence}'" | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": full_prompt, | |
| } | |
| ], | |
| model=model, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| n=num_paraphrases # Number of paraphrased sentences to generate | |
| ) | |
| # Extract the paraphrased sentences from the response | |
| paraphrased_sentences = [choice.message.content.strip() for choice in chat_completion.choices] | |
| # Append paraphrased sentences to the list | |
| paraphrased_sentences_list.extend(paraphrased_sentences) | |
| except Exception as e: | |
| print(f"Error paraphrasing sentence '{sentence}': {e}") | |
| return paraphrased_sentences_list | |
| result = generate_paraphrase("Mayor Eric Adams did not attend the first candidate forum for the New York City mayoral race, but his record — and the criminal charges he faces — received plenty of attention on Saturday from the Democrats who are running to unseat him.") | |
| print(len(result)) |