Spaces:
Sleeping
Sleeping
| import os | |
| from openai import OpenAI | |
| import modal | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class Colors: | |
| """ANSI color codes for terminal output formatting.""" | |
| GREEN = "\033[0;32m" | |
| RED = "\033[0;31m" | |
| BLUE = "\033[0;34m" | |
| GRAY = "\033[0;90m" | |
| BOLD = "\033[1m" | |
| END = "\033[0m" | |
| def ask_ai( | |
| prompt, | |
| system_prompt, | |
| temperature=0.7, | |
| max_tokens=None, | |
| stream=True, | |
| verbose=False | |
| ): | |
| """ | |
| Send a prompt to the AI model and get a response. | |
| Args: | |
| prompt (str): The user prompt to send to the AI | |
| system_prompt (str): The system instructions for the AI | |
| model (str): The model name to use | |
| temperature (float): Controls randomness (0.0-1.0) | |
| max_tokens (int): Maximum tokens in the response | |
| stream (bool): Whether to stream the response | |
| verbose (bool): Whether to print status messages | |
| Returns: | |
| str: The AI's response text | |
| """ | |
| # Create OpenAI client and set up the connection to Modal | |
| API_KEY = os.getenv("Modal_API_KEY") | |
| client = OpenAI(api_key=API_KEY) | |
| # Set base URL to point to our Modal-deployed endpoint | |
| client.base_url = f"https://abhinav77642--llama-3-1-8b-instruct-serve.modal.run/v1" | |
| # Set up the messages for the conversation | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| # Set up the completion parameters | |
| completion_args = { | |
| "model": "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16", | |
| "messages": messages, | |
| "temperature": temperature, | |
| "max_tokens": max_tokens, | |
| "stream": stream | |
| } | |
| # Remove None values | |
| completion_args = {k: v for k, v in completion_args.items() if v is not None} | |
| try: | |
| response = client.chat.completions.create(**completion_args) | |
| # Handle the response based on streaming or non-streaming mode | |
| if stream: | |
| result = "" | |
| for chunk in response: | |
| if chunk.choices and chunk.choices[0].delta.content: | |
| content = chunk.choices[0].delta.content | |
| result += content | |
| return result | |
| else: | |
| result = response.choices[0].message.content | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error during API call: {e}" | |
| return f"Error: {error_msg}" |