Spaces:

Memverge
/

MemMachine-Playground

Running

App Files Files Community

Anirudh Esthuri commited on Nov 14

Commit

e91e2b4

1 Parent(s): bd7679d

Copy all files from Playground - app, gateway_client, llm, model_config, requirements, styles, assets, and config files

Browse files

Files changed (12) hide show

.gitignore +53 -0
.streamlit/config.toml +4 -0
Dockerfile +14 -0
README.md +11 -6
app.py +193 -0
assets/memmachine_logo.png +0 -0
assets/memverge_logo.png +0 -0
gateway_client.py +149 -0
llm.py +219 -0
model_config.py +22 -0
requirements.txt +12 -0
styles.css +26 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,53 @@

+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# Environment variables
+.env
+.env.local
+# Logs
+*.log
+# Cache
+.cache/
+.pytest_cache/
+.mypy_cache/

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,4 @@

+[server]
+headless = true
+enableCORS = false
+enableXsrfProtection = false

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y git
+COPY . .
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+# HuggingFace sets $PORT, don't override it.
+CMD ["bash", "-c", "echo Using PORT=$PORT && streamlit run app.py --server.address 0.0.0.0 --server.port $PORT"]

README.md CHANGED Viewed

@@ -1,12 +1,17 @@
 ---
 title: MemMachine Playground
-emoji: 📊
-colorFrom: yellow
-colorTo: blue
 sdk: docker
 pinned: false
-license: apache-2.0
-short_description: MemMachine-Playground
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: MemMachine Playground
+emoji: 🧠
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# MemMachine Frontend Playground
+This is a Streamlit-based UI for interacting with a remote MemMachine backend.
+- Frontend: Streamlit (runs in this Space)
+- Backend: MemMachine server running on EC2
+- Memory + vector search: Neo4j + Postgres
+- All requests route to your backend via `gateway_client.py`

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+from typing import cast
+import streamlit as st
+from gateway_client import delete_profile, ingest_and_rewrite
+from llm import chat, set_model
+from model_config import MODEL_CHOICES, MODEL_TO_PROVIDER
+def rewrite_message(
+    msg: str, persona_name: str, show_rationale: bool, skip_rewrite: bool
+) -> str:
+    if skip_rewrite:
+        rewritten_msg = msg
+        if show_rationale:
+            rewritten_msg += " At the beginning of your response, please say the following in ITALIC: 'Persona Rationale: No personalization applied.'. Begin your answer on the next line."
+    else:
+        try:
+            rewritten_msg = ingest_and_rewrite(
+                user_id=persona_name, query=msg, model_type=provider
+            )
+            if show_rationale:
+                rewritten_msg += " At the beginning of your response, please say the following in ITALIC: 'Persona Rationale: ' followed by 1 sentence about how your reasoning for how the persona traits influenced this response, also in italics. Begin your answer on the next line."
+        except Exception as e:
+            # If backend is unavailable, use original message without rewriting
+            st.warning(f"Backend memory server unavailable. Using message without personalization: {e}")
+            rewritten_msg = msg
+            if show_rationale:
+                rewritten_msg += " At the beginning of your response, please say the following in ITALIC: 'Persona Rationale: No personalization applied (backend unavailable).'. Begin your answer on the next line."
+    return rewritten_msg
+# ──────────────────────────────────────────────────────────────
+# Page setup & CSS
+# ──────────────────────────────────────────────────────────────
+st.set_page_config(page_title="MemMachine Chatbot", layout="wide")
+with open("./styles.css") as f:
+    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+# ──────────────────────────────────────────────────────────────
+# Sidebar
+# ──────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.image("./assets/memmachine_logo.png", use_container_width=True)
+    st.markdown("#### Choose Model")
+    model_id = st.selectbox(
+        "Choose Model", MODEL_CHOICES, index=0, label_visibility="collapsed"
+    )
+    provider = MODEL_TO_PROVIDER[model_id]
+    set_model(model_id)
+    st.markdown("#### Choose user persona")
+    selected_persona = st.selectbox(
+        "Choose user persona",
+        ["Charlie", "Jing", "Charles", "Control"],
+        label_visibility="collapsed",
+    )
+    custom_persona = st.text_input("Or enter your name", "")
+    persona_name = (
+        custom_persona.strip() if custom_persona.strip() else selected_persona
+    )
+    skip_rewrite = st.checkbox("Skip Rewrite")
+    compare_personas = st.checkbox("Compare with Control persona")
+    show_rationale = st.checkbox("Show Persona Rationale")
+    st.divider()
+    if st.button("Clear chat", use_container_width=True):
+        st.session_state.history = []
+        st.rerun()
+    if st.button("Delete Profile", use_container_width=True):
+        success = delete_profile(persona_name)
+        st.session_state.history = []
+        if success:
+            st.success(f"Profile for '{persona_name}' deleted.")
+        else:
+            st.error(f"Failed to delete profile for '{persona_name}'.")
+    st.divider()
+# ──────────────────────────────────────────────────────────────
+# Session state
+# ──────────────────────────────────────────────────────────────
+if "history" not in st.session_state:
+    st.session_state.history = cast(list[dict], [])
+# ──────────────────────────────────────────────────────────────
+# Enforce alternating roles
+# ──────────────────────────────────────────────────────────────
+def clean_history(history: list[dict], persona: str) -> list[dict]:
+    out = []
+    for turn in history:
+        if turn.get("role") == "user":
+            out.append({"role": "user", "content": turn["content"]})
+        elif turn.get("role") == "assistant" and turn.get("persona") == persona:
+            out.append({"role": "assistant", "content": turn["content"]})
+    cleaned = []
+    last_role = None
+    for msg in out:
+        if msg["role"] != last_role:
+            cleaned.append(msg)
+            last_role = msg["role"]
+    return cleaned
+def append_user_turn(msgs: list[dict], new_user_msg: str) -> list[dict]:
+    if msgs and msgs[-1]["role"] == "user":
+        msgs[-1] = {"role": "user", "content": new_user_msg}
+    else:
+        msgs.append({"role": "user", "content": new_user_msg})
+    return msgs
+# ──────────────────────────────────────────────────────────────
+# Title
+# ──────────────────────────────────────────────────────────────
+st.title("MemMachine Chatbot")
+# ──────────────────────────────────────────────────────────────
+# Chat logic
+# ──────────────────────────────────────────────────────────────
+msg = st.chat_input("Type your message…")
+if msg:
+    st.session_state.history.append({"role": "user", "content": msg})
+    # rewritten_msg = "Use the persona profile to personalize your naswer only when applicable.\n"
+    if compare_personas:
+        all_answers = {}
+        rewritten_msg = rewrite_message(msg, persona_name, show_rationale, False)
+        msgs = clean_history(st.session_state.history, persona_name)
+        msgs = append_user_turn(msgs, rewritten_msg)
+        txt, lat, tok, tps = chat(msgs, persona_name)
+        all_answers[persona_name] = txt
+        rewritten_msg_control = rewrite_message(msg, "Control", show_rationale, True)
+        msgs_control = clean_history(st.session_state.history, "Control")
+        msgs_control = append_user_turn(msgs_control, rewritten_msg_control)
+        txt_control, lat, tok, tps = chat(msgs_control, "Arnold")
+        all_answers["Control"] = txt_control
+        st.session_state.history.append(
+            {"role": "assistant_all", "axis": "role", "content": all_answers}
+        )
+    else:
+        rewritten_msg = rewrite_message(msg, persona_name, show_rationale, skip_rewrite)
+        msgs = clean_history(st.session_state.history, persona_name)
+        msgs = append_user_turn(msgs, rewritten_msg)
+        txt, lat, tok, tps = chat(
+            msgs, "Arnold" if persona_name == "Control" else persona_name
+        )
+        st.session_state.history.append(
+            {"role": "assistant", "persona": persona_name, "content": txt}
+        )
+# ──────────────────────────────────────────────────────────────
+# Chat history display
+# ──────────────────────────────────────────────────────────────
+for turn in st.session_state.history:
+    if turn.get("role") == "user":
+        st.chat_message("user").write(turn["content"])
+    elif turn.get("role") == "assistant":
+        st.chat_message("assistant").write(turn["content"])
+    elif turn.get("role") == "assistant_all":
+        content_items = list(turn["content"].items())
+        if len(content_items) >= 2:
+            cols = st.columns([1, 0.03, 1])
+            persona_label, persona_response = content_items[0]
+            control_label, control_response = content_items[1]
+            with cols[0]:
+                st.markdown(f"**{persona_label}**")
+                st.markdown(
+                    f'<div class="answer">{persona_response}</div>',
+                    unsafe_allow_html=True,
+                )
+            with cols[1]:
+                st.markdown(
+                    '<div class="vertical-divider"></div>', unsafe_allow_html=True
+                )
+            with cols[2]:
+                st.markdown(f"**{control_label}**")
+                st.markdown(
+                    f'<div class="answer">{control_response}</div>',
+                    unsafe_allow_html=True,
+                )
+        else:
+            for label, response in content_items:
+                st.markdown(f"**{label}**")
+                st.markdown(
+                    f'<div class="answer">{response}</div>', unsafe_allow_html=True
+                )

assets/memmachine_logo.png ADDED Viewed

assets/memverge_logo.png ADDED Viewed

gateway_client.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+from datetime import datetime
+import requests
+# Backend server URL - can be set via environment variable
+# For Hugging Face Spaces: Set MEMORY_SERVER_URL in Space settings (Repository secrets)
+# For local development: Set MEMORY_SERVER_URL in your .env file
+# Default: http://3.232.95.65:8080 (MemMachine backend)
+EXAMPLE_SERVER_PORT = os.getenv("MEMORY_SERVER_URL")
+def ingest_and_rewrite(user_id: str, query: str, model_type: str = "openai") -> str:
+    """Pass a raw user message through the memory server and get context-aware response."""
+    print("entered ingest_and_rewrite")
+    # First, store the message in memory
+    session_data = {
+        "group_id": user_id,
+        "agent_id": ["assistant"],
+        "user_id": [user_id],
+        "session_id": f"session_{user_id}",
+    }
+    episode_data = {
+        "session": session_data,
+        "producer": user_id,
+        "produced_for": "assistant",
+        "episode_content": query,
+        "episode_type": "message",
+        "metadata": {
+            "speaker": user_id,
+            "timestamp": datetime.now().isoformat(),
+            "type": "message",
+        },
+    }
+    # Store the episode
+    store_resp = requests.post(
+        f"{EXAMPLE_SERVER_PORT}/memory",
+        json=episode_data,
+        timeout=1000,
+    )
+    store_resp.raise_for_status()
+    # Then search for relevant context
+    search_data = {
+        "session": session_data,
+        "query": query,
+        "limit": 5,
+        "filter": {"producer_id": user_id},
+    }
+    search_resp = requests.post(
+        f"{EXAMPLE_SERVER_PORT}/memory/search",
+        json=search_data,
+        timeout=1000,
+    )
+    search_resp.raise_for_status()
+    search_results = search_resp.json()
+    content = search_results.get("content", {})
+    episodic_memory = content.get("episodic_memory", [])
+    profile_memory = content.get("profile_memory", [])
+    # Format the response similar to example_server.py
+    if profile_memory and episodic_memory:
+        profile_str = "\n".join([str(p) for p in profile_memory]) if isinstance(profile_memory, list) else str(profile_memory)
+        context_str = "\n".join([str(c) for c in episodic_memory]) if isinstance(episodic_memory, list) else str(episodic_memory)
+        return f"Profile: {profile_str}\n\nContext: {context_str}\n\nQuery: {query}"
+    elif profile_memory:
+        profile_str = "\n".join([str(p) for p in profile_memory]) if isinstance(profile_memory, list) else str(profile_memory)
+        return f"Profile: {profile_str}\n\nQuery: {query}"
+    elif episodic_memory:
+        context_str = "\n".join([str(c) for c in episodic_memory]) if isinstance(episodic_memory, list) else str(episodic_memory)
+        return f"Context: {context_str}\n\nQuery: {query}"
+    else:
+        return f"Message ingested successfully. No relevant context found yet.\n\nQuery: {query}"
+def add_session_message(user_id: str, msg: str) -> None:
+    """Add a raw message into memory via memory server."""
+    session_data = {
+        "group_id": user_id,
+        "agent_id": ["assistant"],
+        "user_id": [user_id],
+        "session_id": f"session_{user_id}",
+    }
+    episode_data = {
+        "session": session_data,
+        "producer": user_id,
+        "produced_for": "assistant",
+        "episode_content": msg,
+        "episode_type": "message",
+        "metadata": {
+            "speaker": user_id,
+            "timestamp": datetime.now().isoformat(),
+            "type": "message",
+        },
+    }
+    requests.post(
+        f"{EXAMPLE_SERVER_PORT}/memory",
+        json=episode_data,
+        timeout=5,
+    )
+def create_persona_query(user_id: str, query: str) -> str:
+    """Create a persona-aware query by searching memory context via memory server."""
+    session_data = {
+        "group_id": user_id,
+        "agent_id": ["assistant"],
+        "user_id": [user_id],
+        "session_id": f"session_{user_id}",
+    }
+    search_data = {
+        "session": session_data,
+        "query": query,
+        "limit": 5,
+        "filter": {"producer_id": user_id},
+    }
+    resp = requests.post(
+        f"{EXAMPLE_SERVER_PORT}/memory/search",
+        json=search_data,
+        timeout=1000,
+    )
+    resp.raise_for_status()
+    search_results = resp.json()
+    content = search_results.get("content", {})
+    profile_memory = content.get("profile_memory", [])
+    if profile_memory:
+        profile_str = "\n".join([str(p) for p in profile_memory]) if isinstance(profile_memory, list) else str(profile_memory)
+        return f"Based on your profile: {profile_str}\n\nQuery: {query}"
+    else:
+        return f"Query: {query}"
+def add_new_session_message(user_id: str, msg: str) -> None:
+    """Alias for add_session_message for backward compatibility."""
+    add_session_message(user_id, msg)
+def delete_profile(user_id: str) -> bool:
+    """Delete all memory for the given user_id via the CRM server."""
+    # NOT IMPLEMENTED
+    return False

llm.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import json
+import os
+import time
+import boto3
+import openai
+from dotenv import load_dotenv
+from model_config import MODEL_TO_PROVIDER
+# ──────────────────────────────────────────────────────────────
+# Load environment variables
+load_dotenv()
+# ──────────────────────────────────────────────────────────────
+# ──────────────────────────────────────────────────────────────
+# Configuration
+# ──────────────────────────────────────────────────────────────
+MODEL_STRING = "gpt-4.1-mini"  # we default on gpt-4.1-mini
+api_key = os.getenv("MODEL_API_KEY")
+client = openai.OpenAI(api_key=api_key)
+bedrock_runtime = boto3.client("bedrock-runtime", region_name="us-west-2")
+# ──────────────────────────────────────────────────────────────
+# Model switcher
+# ──────────────────────────────────────────────────────────────
+def set_model(model_id: str) -> None:
+    global MODEL_STRING
+    MODEL_STRING = model_id
+    print(f"Model changed to: {model_id}")
+def set_provider(provider: str) -> None:
+    global PROVIDER
+# ──────────────────────────────────────────────────────────────
+# High-level Chat wrapper
+# ──────────────────────────────────────────────────────────────
+def chat(messages, persona):
+    provider = MODEL_TO_PROVIDER[MODEL_STRING]
+    if provider == "openai":
+        print("Using openai: ", MODEL_STRING)
+        system_prompt = None
+        if messages and messages[0].get("role") == "system":
+            system_prompt = messages[0]["content"]
+            messages = messages[1:]
+        t0 = time.time()
+        out = client.responses.create(
+            model=MODEL_STRING,
+            instructions=system_prompt,
+            input=messages,  # messages=messages
+            max_output_tokens=500,  #  max_tokens=500,
+            temperature=0.5,
+            store=False,  # keeps call stateless
+        )
+        dt = time.time() - t0
+        text = out.output_text.strip()  # out.choices[0].message.content.strip()
+        tok_out = out.usage.output_tokens
+        tok_in = out.usage.input_tokens
+        total_tok = (
+            tok_out + tok_in
+            if tok_out is not None and tok_in is not None
+            else len(text.split())
+        )
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    elif provider == "anthropic":
+        print("Using anthropic: ", MODEL_STRING)
+        t0 = time.time()
+        claude_messages = [
+            {"role": m["role"], "content": m["content"]} for m in messages
+        ]
+        response = bedrock_runtime.invoke_model(
+            modelId=MODEL_STRING,
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(
+                {
+                    "anthropic_version": "bedrock-2023-05-31",
+                    "messages": claude_messages,
+                    "max_tokens": 500,
+                    "temperature": 0.5,
+                }
+            ),
+        )
+        dt = time.time() - t0
+        body = json.loads(response["body"].read())
+        text = "".join(
+            part["text"] for part in body["content"] if part["type"] == "text"
+        ).strip()
+        total_tok = len(text.split())
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    elif provider == "deepseek":
+        print("Using deepseek: ", MODEL_STRING)
+        t0 = time.time()
+        prompt = messages[-1]["content"]
+        formatted_prompt = (
+            f"<｜begin▁of▁sentence｜><｜User｜>{prompt}<｜Assistant｜><think>\n"
+        )
+        response = bedrock_runtime.invoke_model(
+            modelId=MODEL_STRING,
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(
+                {
+                    "prompt": formatted_prompt,
+                    "max_tokens": 500,
+                    "temperature": 0.5,
+                    "top_p": 0.9,
+                }
+            ),
+        )
+        dt = time.time() - t0
+        body = json.loads(response["body"].read())
+        text = body["choices"][0]["text"].strip()
+        total_tok = len(text.split())
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    elif provider == "meta":
+        print("Using meta (LLaMA): ", MODEL_STRING)
+        t0 = time.time()
+        prompt = messages[-1]["content"]
+        # Format prompt in LLaMA-style instruction format
+        formatted_prompt = (
+            "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n"
+            + prompt.strip()
+            + "\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
+        )
+        response = bedrock_runtime.invoke_model(
+            modelId=MODEL_STRING,
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(
+                {"prompt": formatted_prompt, "max_gen_len": 512, "temperature": 0.5}
+            ),
+        )
+        dt = time.time() - t0
+        body = json.loads(response["body"].read())
+        text = body.get("generation", "").strip()
+        total_tok = len(text.split())
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    elif provider == "mistral":
+        print("Using mistral: ", MODEL_STRING)
+        t0 = time.time()
+        prompt = messages[-1]["content"]
+        formatted_prompt = f"<s>[INST] {prompt} [/INST]"
+        response = bedrock_runtime.invoke_model(
+            modelId=MODEL_STRING,
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(
+                {"prompt": formatted_prompt, "max_tokens": 512, "temperature": 0.5}
+            ),
+        )
+        dt = time.time() - t0
+        body = json.loads(response["body"].read())
+        text = body["outputs"][0]["text"].strip()
+        total_tok = len(text.split())
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+# ──────────────────────────────────────────────────────────────
+# Diagnostics / CLI test
+# ──────────────────────────────────────────────────────────────
+def check_credentials():
+    required = ["MODEL_API_KEY"]
+    missing = [var for var in required if not os.getenv(var)]
+    if missing:
+        print(f"Missing environment variables: {missing}")
+        return False
+    return True
+def test_chat():
+    print("Testing chat...")
+    try:
+        test_messages = [
+            {
+                "role": "user",
+                "content": "Hello! Please respond with just 'Test successful'.",
+            }
+        ]
+        text, latency, tokens, tps = chat(test_messages)
+        print(f"Test passed!  {text}  {latency:.2f}s  {tokens} ⚡ {tps:.1f} tps")
+    except Exception as e:
+        print(f"Test failed: {e}")
+if __name__ == "__main__":
+    print("running diagnostics")
+    if check_credentials():
+        test_chat()
+    print("\nDone.")

model_config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+PROVIDER_MODEL_MAP = {
+    "openai": ["gpt-4.1-mini"],
+    "anthropic": [
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-3-5-haiku-20241022-v1:0",
+    ],
+    "deepseek": ["us.deepseek.r1-v1:0"],
+    "meta": ["meta.llama3-8b-instruct-v1:0", "meta.llama3-70b-instruct-v1:0"],
+    "mistral": [
+        "mistral.mixtral-8x7b-instruct-v0:1",
+        "mistral.mistral-7b-instruct-v0:2",
+    ],
+}
+# "meta.llama4-maverick-17b-instruct-v1:0" (not currently working)
+MODEL_TO_PROVIDER = {
+    model: provider
+    for provider, models in PROVIDER_MODEL_MAP.items()
+    for model in models
+}
+MODEL_CHOICES = [model for models in PROVIDER_MODEL_MAP.values() for model in models]

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+altair
+pandas
+streamlit
+requests
+python-dotenv
+websocket-client
+requests
+openai
+anthropic
+tiktoken
+pydantic
+boto3

styles.css ADDED Viewed

	@@ -0,0 +1,26 @@

+/* -- Sidebar width & padding -- */
+section[data-testid="stSidebar"]          { width: 230px !important; }
+section[data-testid="stSidebarContent"]   { width: 230px !important;
+                                            padding: 0.75rem; }
+/* -- Title size -- */
+h1 { font-size: 2.1rem !important; margin-bottom: 1rem; }
+/* -- Ensure long links wrap inside comparison columns -- */
+div.answer { white-space: pre-wrap; overflow-wrap: anywhere; }
+/* Tighten spacing between comparison columns */
+div[data-testid="column"] {
+    padding-left: 0.25rem !important;
+    padding-right: 0.25rem !important;
+    margin-left: 0 !important;
+    margin-right: 0 !important;
+    flex-grow: 1;
+  }
+  /* Align vertical divider better */
+  .vertical-divider {
+    height: 100%;
+    border-left: 1px solid #ccc;
+    margin: 0 0.4rem;
+  }