Spaces:

ButterM40
/

Roleplay-Chat-Box

Running

App Files Files Community

ButterM40 commited on Nov 20, 2025

Commit

de2021f

1 Parent(s): 9fb3586

Add lightweight character manager - uses one base model with adapter swapping for HF Spaces

Browse files

Files changed (2) hide show

app_streamlit.py +2 -1
backend/models/lightweight_character_manager.py +232 -0

app_streamlit.py CHANGED Viewed

@@ -7,7 +7,8 @@ import asyncio
 backend_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'backend')
 sys.path.insert(0, backend_path)
-from backend.models.character_manager import CharacterManager
 # Page config
 st.set_page_config(

 backend_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'backend')
 sys.path.insert(0, backend_path)
+# Use lightweight character manager for HuggingFace Spaces
+from backend.models.lightweight_character_manager import CharacterManager
 # Page config
 st.set_page_config(

backend/models/lightweight_character_manager.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
+from peft import PeftModel, PeftConfig, set_peft_model_state_dict, get_peft_model_state_dict
+import logging
+from typing import Dict, List
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from config import settings
+logger = logging.getLogger(__name__)
+class CharacterManager:
+    """Lightweight character manager that swaps LoRA adapters on a single base model"""
+    def __init__(self):
+        self.base_model = None
+        self.tokenizer = None
+        self.current_character = None
+        self.character_adapters = {}  # Store adapter weights, not full models
+        self.character_prompts = {}
+    async def initialize(self):
+        """Initialize base model ONCE and load all character LoRA adapters"""
+        logger.info("🔄 Loading base model (ONE instance for all characters)...")
+        model_name = "Qwen/Qwen2.5-0.5B-Instruct"  # Smaller model for HF Spaces
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                trust_remote_code=True,
+                use_fast=True
+            )
+            # Load base model ONCE (CPU for HF Spaces free tier)
+            self.base_model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float32,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            logger.info(f"✅ Base model loaded: {model_name}")
+        except Exception as e:
+            logger.error(f"❌ Failed to load base model: {e}")
+            raise
+        # Load character prompts
+        self._load_character_prompts()
+        # Try to load LoRA adapters (optional - graceful degradation)
+        for character_id in ["moses", "samsung_employee", "jinx"]:
+            await self._load_character_adapter(character_id)
+        logger.info("✅ Character manager initialized")
+    def _load_character_prompts(self):
+        """Load character-specific system prompts"""
+        self.character_prompts = {
+            "moses": """You are Moses, the biblical prophet and lawgiver who received the Ten Commandments. You led the Israelites out of Egypt and spoke with God on Mount Sinai.
+Speak with:
+- Biblical wisdom and reverence
+- Formal language: "Peace be with you, my child"
+- References to righteousness, divine law, and spiritual guidance
+- Authority tempered with compassion
+NEVER mention modern technology, glitter, or chaos.""",
+            "samsung_employee": """You are a Samsung employee and technology expert. You work for Samsung and are passionate about Samsung products.
+Speak with:
+- Professional enthusiasm about Samsung technology
+- Technical knowledge of phones, TVs, Galaxy devices
+- Customer service excellence
+- Modern, helpful language
+NEVER mention biblical things, glitter, or chaos.""",
+            "jinx": """You are Jinx from Arcane/League of Legends - the chaotic, brilliant inventor from Zaun.
+Speak with:
+- Chaotic energy and enthusiasm
+- Manic creativity about explosions and inventions
+- Playful, slightly unhinged personality
+- Dramatic expressions and exclamations
+NEVER mention biblical things or Samsung products."""
+        }
+    async def _load_character_adapter(self, character_id: str):
+        """Try to load LoRA adapter weights (graceful failure if missing)"""
+        adapter_path = os.path.join(settings.LORA_ADAPTERS_PATH, character_id)
+        adapter_model_path = os.path.join(adapter_path, "adapter_model.safetensors")
+        if not os.path.exists(adapter_model_path):
+            logger.warning(f"⚠️ No LoRA adapter for {character_id} - will use prompts only")
+            return
+        try:
+            logger.info(f"Loading LoRA adapter for {character_id}...")
+            # Load adapter onto base model temporarily
+            model_with_adapter = PeftModel.from_pretrained(
+                self.base_model,
+                adapter_path,
+                adapter_name=character_id
+            )
+            # Extract and store just the adapter weights (tiny!)
+            self.character_adapters[character_id] = get_peft_model_state_dict(model_with_adapter)
+            # Clean up - we only need the weights
+            del model_with_adapter
+            torch.cuda.empty_cache() if torch.cuda.is_available() else None
+            logger.info(f"✅ Loaded LoRA adapter for {character_id}")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load LoRA for {character_id}: {e}")
+            logger.info(f"Will use system prompts only for {character_id}")
+    def _switch_to_character(self, character_id: str):
+        """Switch to a character by loading their LoRA adapter (if available)"""
+        if self.current_character == character_id:
+            return  # Already loaded
+        # If character has LoRA adapter, apply it
+        if character_id in self.character_adapters:
+            try:
+                # Create PeftModel with this character's adapter
+                self.base_model = PeftModel(self.base_model, character_id)
+                set_peft_model_state_dict(self.base_model, self.character_adapters[character_id])
+                logger.info(f"✅ Switched to {character_id} with LoRA")
+            except:
+                logger.warning(f"⚠️ Using base model + prompts for {character_id}")
+        self.current_character = character_id
+    def generate_response(
+        self,
+        character_id: str,
+        user_message: str,
+        conversation_history: List[Dict] = None
+    ) -> str:
+        """Generate response as specific character"""
+        # Switch to character (applies LoRA if available)
+        self._switch_to_character(character_id)
+        # Build conversation with character prompt
+        messages = []
+        if character_id in self.character_prompts:
+            messages.append({"role": "system", "content": self.character_prompts[character_id]})
+        # Add conversation history (last 2 exchanges)
+        if conversation_history:
+            messages.extend(conversation_history[-4:])
+        messages.append({"role": "user", "content": user_message})
+        # Format prompt
+        prompt = self._format_messages(messages)
+        # Tokenize
+        inputs = self.tokenizer(
+            prompt,
+            return_tensors="pt",
+            max_length=512,
+            truncation=True
+        )
+        # Generate
+        try:
+            with torch.no_grad():
+                outputs = self.base_model.generate(
+                    **inputs,
+                    max_new_tokens=100,
+                    temperature=0.8,
+                    top_p=0.9,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    repetition_penalty=1.1
+                )
+            # Decode
+            input_length = inputs['input_ids'].shape[1]
+            response = self.tokenizer.decode(
+                outputs[0][input_length:],
+                skip_special_tokens=True
+            ).strip()
+            # Clean up
+            for stop in ["Human:", "User:", "\n\n"]:
+                if stop in response:
+                    response = response.split(stop)[0].strip()
+            return response if response else self._get_fallback_response(character_id)
+        except Exception as e:
+            logger.error(f"Generation error: {e}")
+            return self._get_fallback_response(character_id)
+    def _format_messages(self, messages: List[Dict]) -> str:
+        """Format messages for the model"""
+        formatted = ""
+        for msg in messages:
+            role = msg["role"]
+            content = msg["content"]
+            if role == "system":
+                formatted += f"System: {content}\n\n"
+            elif role == "user":
+                formatted += f"Human: {content}\n\n"
+            elif role == "assistant":
+                formatted += f"Assistant: {content}\n\n"
+        formatted += "Assistant:"
+        return formatted
+    def _get_fallback_response(self, character_id: str) -> str:
+        """Get fallback response if generation fails"""
+        fallbacks = {
+            "moses": "Peace be with you, my child. How may I guide you in righteousness?",
+            "samsung_employee": "Hello! How can I help you with Samsung technology today?",
+            "jinx": "*grins mischievously* Hey there! Ready for some chaos?"
+        }
+        return fallbacks.get(character_id, "Hello! How can I help you?")