Spaces:

Memverge
/

MemMachine-Playground

Running

App Files Files Community

Anirudh Esthuri commited on 25 days ago

Commit

16ab50a

1 Parent(s): 3a73f5d

Switch Gemini models to use Google API directly instead of AWS Bedrock

Browse files

Files changed (3) hide show

llm.py +91 -80
model_config.py +4 -13
requirements.txt +1 -0

llm.py CHANGED Viewed

@@ -8,6 +8,39 @@ import requests
 from dotenv import load_dotenv
 from model_config import MODEL_TO_PROVIDER, MODEL_TO_INFERENCE_PROFILE_ARN
 # ──────────────────────────────────────────────────────────────
 # Load environment variables
 load_dotenv()
@@ -177,101 +210,64 @@ def chat(messages, persona):
         print("Using google (Gemini): ", MODEL_STRING)
         t0 = time.time()
-        # Add system prompt for better behavior
-        system_prompt = ""
-        # Convert messages to Gemini format
-        # Gemini uses "user" and "model" roles, and content is an array
-        gemini_messages = []
-        for msg in messages:
-            role = msg.get("role", "user")
-            # Gemini uses "model" instead of "assistant"
-            if role == "assistant":
-                role = "model"
-            gemini_messages.append({
-                "role": role,
-                "parts": [{"text": msg["content"]}]
-            })
         try:
-            bedrock_runtime = get_bedrock_client()
-            # Use inference profile ARN if available (for provisioned throughput models)
-            # Otherwise use modelId (for on-demand models)
-            invoke_kwargs = {
-                "contentType": "application/json",
-                "accept": "application/json",
-                "body": json.dumps(
-                    {
-                        "contents": gemini_messages,
-                        "generationConfig": {
-                            "maxOutputTokens": 4000,
-                            "temperature": 0.3,
-                        }
-                    }
-                ),
-            }
-            # Add system instruction if provided
-            if system_prompt:
-                invoke_kwargs["body"] = json.dumps(
-                    {
-                        "contents": gemini_messages,
-                        "systemInstruction": {
-                            "parts": [{"text": system_prompt}]
-                        },
-                        "generationConfig": {
-                            "maxOutputTokens": 4000,
-                            "temperature": 0.3,
-                        }
-                    }
-                )
-            # Check if this model has an inference profile ARN (provisioned throughput)
-            # For provisioned throughput, use the ARN as the modelId
-            if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
-                invoke_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
-            else:
-                invoke_kwargs["modelId"] = MODEL_STRING
-            response = bedrock_runtime.invoke_model(**invoke_kwargs)
             dt = time.time() - t0
-            body = json.loads(response["body"].read())
         except ValueError as e:
             # Re-raise ValueError (credential errors) as-is
             raise
         except Exception as e:
             error_msg = str(e)
-            if "ValidationException" in error_msg and "model identifier is invalid" in error_msg:
                 raise ValueError(
-                    f"Invalid Bedrock model ID: '{MODEL_STRING}'. "
-                    f"Error: {error_msg}. "
-                    "Please verify the model ID is correct and the model is available in your AWS region. "
-                    "Common Gemini model IDs: 'google.gemini-pro-v1' or 'google.gemini-2.0-flash-exp'"
                 ) from e
-            elif "UnrecognizedClientException" in error_msg or "invalid" in error_msg.lower():
                 raise ValueError(
-                    f"AWS Bedrock authentication failed: {error_msg}. "
-                    "Please verify your AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY secrets "
-                    "are correct and have Bedrock access permissions."
                 ) from e
             raise
-        # Extract text from Gemini response
-        # Gemini response format: {"candidates": [{"content": {"parts": [{"text": "..."}]}}]}
-        text = ""
-        if "candidates" in body and len(body["candidates"]) > 0:
-            candidate = body["candidates"][0]
-            if "content" in candidate and "parts" in candidate["content"]:
-                for part in candidate["content"]["parts"]:
-                    if "text" in part:
-                        text += part["text"]
-        text = text.strip()
-        total_tok = len(text.split())
-        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
     elif provider == "deepseek":
         print("Using deepseek: ", MODEL_STRING)
         t0 = time.time()
@@ -477,8 +473,8 @@ def check_credentials():
     #         print(f"Ollama connection failed: {e}")
     #         return False
-    # Check if using Bedrock providers (anthropic, google, meta, mistral, deepseek)
-    bedrock_providers = ["anthropic", "google"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try:
@@ -519,6 +515,21 @@ def check_credentials():
             return False
         return True
     return True

 from dotenv import load_dotenv
 from model_config import MODEL_TO_PROVIDER, MODEL_TO_INFERENCE_PROFILE_ARN
+# Lazy initialization of Google Gemini client
+_google_client = None
+def get_google_client():
+    """Get or create the Google Gemini client with proper error handling."""
+    global _google_client
+    if _google_client is None:
+        try:
+            import google.generativeai as genai
+        except ImportError:
+            raise ValueError(
+                "google-generativeai package not installed. "
+                "Please add 'google-generativeai' to requirements.txt"
+            )
+        google_api_key = os.getenv("GOOGLE_API_KEY", "").strip()
+        if not google_api_key:
+            raise ValueError(
+                "Google API key not found. Please set GOOGLE_API_KEY "
+                "as a secret in Hugging Face Spaces settings."
+            )
+        try:
+            genai.configure(api_key=google_api_key)
+            _google_client = genai
+        except Exception as e:
+            raise ValueError(
+                f"Failed to initialize Google Gemini client: {str(e)}. "
+                "Please verify your GOOGLE_API_KEY is correct."
+            ) from e
+    return _google_client
 # ──────────────────────────────────────────────────────────────
 # Load environment variables
 load_dotenv()
         print("Using google (Gemini): ", MODEL_STRING)
         t0 = time.time()
         try:
+            genai = get_google_client()
+            # Get the model
+            model = genai.GenerativeModel(MODEL_STRING)
+            # Convert messages to Gemini format
+            # Gemini API expects a chat history format
+            chat_history = []
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                # Gemini uses "model" instead of "assistant"
+                if role == "assistant":
+                    role = "model"
+                chat_history.append({
+                    "role": role,
+                    "parts": [content]
+                })
+            # Start a chat session with history
+            chat = model.start_chat(history=chat_history[:-1] if len(chat_history) > 1 else [])
+            # Send the last message
+            last_message = chat_history[-1]["parts"][0] if chat_history else ""
+            response = chat.send_message(
+                last_message,
+                generation_config=genai.types.GenerationConfig(
+                    max_output_tokens=4000,
+                    temperature=0.3,
+                )
+            )
             dt = time.time() - t0
+            text = response.text.strip()
+            # Calculate tokens (approximate)
+            total_tok = len(text.split())
+            return text, dt, total_tok, (total_tok / dt if dt else total_tok)
         except ValueError as e:
             # Re-raise ValueError (credential errors) as-is
             raise
         except Exception as e:
             error_msg = str(e)
+            if "API key" in error_msg or "invalid" in error_msg.lower() or "401" in error_msg or "403" in error_msg:
                 raise ValueError(
+                    f"Google API authentication failed: {error_msg}. "
+                    "Please verify your GOOGLE_API_KEY secret is correct and has Gemini API access."
                 ) from e
+            elif "not found" in error_msg.lower() or "404" in error_msg:
                 raise ValueError(
+                    f"Invalid Gemini model ID: '{MODEL_STRING}'. "
+                    f"Error: {error_msg}. "
+                    "Please verify the model ID is correct. "
+                    "Common Gemini model IDs: 'gemini-3.0-pro', 'gemini-2.5-flash', 'gemini-1.5-pro', 'gemini-1.5-flash'"
                 ) from e
             raise
     elif provider == "deepseek":
         print("Using deepseek: ", MODEL_STRING)
         t0 = time.time()
     #         print(f"Ollama connection failed: {e}")
     #         return False
+    # Check if using Bedrock providers (anthropic, meta, mistral, deepseek)
+    bedrock_providers = ["anthropic"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try:
             return False
         return True
+    # For Google Gemini, check API key
+    if MODEL_TO_PROVIDER.get(MODEL_STRING) == "google":
+        required = ["GOOGLE_API_KEY"]
+        missing = [var for var in required if not os.getenv(var)]
+        if missing:
+            print(f"Missing environment variables: {missing}")
+            return False
+        # Try to initialize the client to verify the key works
+        try:
+            get_google_client()
+            return True
+        except Exception as e:
+            print(f"Google API client initialization failed: {e}")
+            return False
     return True

model_config.py CHANGED Viewed

@@ -11,8 +11,8 @@ PROVIDER_MODEL_MAP = {
         "anthropic.claude-opus-4-20250514-v1:0",
     ],
     "google": [
-        "google.gemini-3.0-pro-v1:0",
-        "google.gemini-2.5-flash-v1:0",
     ],
 }
@@ -32,8 +32,8 @@ MODEL_DISPLAY_NAMES = {
     "anthropic.claude-haiku-4-5-20251001-v1:0": "AWS Bedrock - Anthropic - Claude Haiku 4.5",
     "anthropic.claude-sonnet-4-5-20250929-v1:0": "AWS Bedrock - Anthropic - Claude Sonnet 4.5",
     "anthropic.claude-opus-4-20250514-v1:0": "AWS Bedrock - Anthropic - Claude Opus 4",
-    "google.gemini-3.0-pro-v1:0": "AWS Bedrock - Google - Gemini 3.0 Pro",
-    "google.gemini-2.5-flash-v1:0": "AWS Bedrock - Google - Gemini 2.5 Flash",
 }
 MODEL_CHOICES = [model for models in PROVIDER_MODEL_MAP.values() for model in models]
@@ -58,12 +58,3 @@ opus_arn = os.getenv("BEDROCK_OPUS_4_ARN", "").strip()
 if opus_arn:
     MODEL_TO_INFERENCE_PROFILE_ARN["anthropic.claude-opus-4-20250514-v1:0"] = opus_arn
-# Gemini 3.0 Pro
-gemini_3_arn = os.getenv("BEDROCK_GEMINI_3_ARN", "").strip()
-if gemini_3_arn:
-    MODEL_TO_INFERENCE_PROFILE_ARN["google.gemini-3.0-pro-v1:0"] = gemini_3_arn
-# Gemini 2.5 Flash
-gemini_2_5_arn = os.getenv("BEDROCK_GEMINI_2_5_ARN", "").strip()
-if gemini_2_5_arn:
-    MODEL_TO_INFERENCE_PROFILE_ARN["google.gemini-2.5-flash-v1:0"] = gemini_2_5_arn

         "anthropic.claude-opus-4-20250514-v1:0",
     ],
     "google": [
+        "gemini-3.0-pro",
+        "gemini-2.5-flash",
     ],
 }
     "anthropic.claude-haiku-4-5-20251001-v1:0": "AWS Bedrock - Anthropic - Claude Haiku 4.5",
     "anthropic.claude-sonnet-4-5-20250929-v1:0": "AWS Bedrock - Anthropic - Claude Sonnet 4.5",
     "anthropic.claude-opus-4-20250514-v1:0": "AWS Bedrock - Anthropic - Claude Opus 4",
+    "gemini-3.0-pro": "Google - Gemini 3.0 Pro",
+    "gemini-2.5-flash": "Google - Gemini 2.5 Flash",
 }
 MODEL_CHOICES = [model for models in PROVIDER_MODEL_MAP.values() for model in models]
 if opus_arn:
     MODEL_TO_INFERENCE_PROFILE_ARN["anthropic.claude-opus-4-20250514-v1:0"] = opus_arn

requirements.txt CHANGED Viewed

@@ -11,3 +11,4 @@ tiktoken
 pydantic
 boto3
 huggingface_hub

 pydantic
 boto3
 huggingface_hub
+google-generativeai