Upload folder using huggingface_hub
Browse files- Dockerfile +4 -0
- main.py +62 -268
- static/index.html +115 -0
Dockerfile
CHANGED
|
@@ -25,6 +25,10 @@ RUN pip install --no-cache-dir fastapi==0.115.0 uvicorn[standard]==0.30.6 \
|
|
| 25 |
# Copy application code
|
| 26 |
COPY . .
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Create non-root user for security
|
| 29 |
RUN useradd -m -u 1000 user
|
| 30 |
USER user
|
|
|
|
| 25 |
# Copy application code
|
| 26 |
COPY . .
|
| 27 |
|
| 28 |
+
# Create static directory
|
| 29 |
+
RUN mkdir -p /app/static
|
| 30 |
+
COPY static/ /app/static/
|
| 31 |
+
|
| 32 |
# Create non-root user for security
|
| 33 |
RUN useradd -m -u 1000 user
|
| 34 |
USER user
|
main.py
CHANGED
|
@@ -4,7 +4,8 @@ Full compatibility with Anthropic Messages API + Interleaved Thinking
|
|
| 4 |
Optimized for: 2 vCPU, 16GB RAM
|
| 5 |
"""
|
| 6 |
from fastapi import FastAPI, HTTPException, Header, Request
|
| 7 |
-
from fastapi.responses import StreamingResponse
|
|
|
|
| 8 |
from pydantic import BaseModel, Field
|
| 9 |
from typing import Optional, List, Union, Literal, Any, Dict
|
| 10 |
import torch
|
|
@@ -52,10 +53,12 @@ async def lifespan(app: FastAPI):
|
|
| 52 |
|
| 53 |
|
| 54 |
app = FastAPI(
|
| 55 |
-
title="
|
| 56 |
description="Anthropic API Compatible with Interleaved Thinking",
|
| 57 |
version="1.0.0",
|
| 58 |
-
lifespan=lifespan
|
|
|
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
|
|
@@ -143,15 +146,15 @@ class AnthropicRequest(BaseModel):
|
|
| 143 |
max_tokens: int = 1024
|
| 144 |
temperature: Optional[float] = Field(default=1.0, gt=0.0, le=1.0)
|
| 145 |
top_p: Optional[float] = Field(default=1.0, gt=0.0, le=1.0)
|
| 146 |
-
top_k: Optional[int] = None
|
| 147 |
-
stop_sequences: Optional[List[str]] = None
|
| 148 |
stream: Optional[bool] = False
|
| 149 |
system: Optional[Union[str, List[TextBlock]]] = None
|
| 150 |
tools: Optional[List[Tool]] = None
|
| 151 |
tool_choice: Optional[Union[ToolChoice, Dict[str, Any]]] = None
|
| 152 |
metadata: Optional[Metadata] = None
|
| 153 |
thinking: Optional[ThinkingConfig] = None
|
| 154 |
-
service_tier: Optional[str] = None
|
| 155 |
|
| 156 |
|
| 157 |
class Usage(BaseModel):
|
|
@@ -175,10 +178,8 @@ class AnthropicResponse(BaseModel):
|
|
| 175 |
# ============== Helper Functions ==============
|
| 176 |
|
| 177 |
def extract_text_from_content(content: Union[str, List[ContentBlock]]) -> str:
|
| 178 |
-
"""Extract text from content which may be string or list of blocks"""
|
| 179 |
if isinstance(content, str):
|
| 180 |
return content
|
| 181 |
-
|
| 182 |
texts = []
|
| 183 |
for block in content:
|
| 184 |
if isinstance(block, str):
|
|
@@ -196,7 +197,6 @@ def extract_text_from_content(content: Union[str, List[ContentBlock]]) -> str:
|
|
| 196 |
|
| 197 |
|
| 198 |
def format_system_prompt(system: Optional[Union[str, List[TextBlock]]]) -> str:
|
| 199 |
-
"""Format system prompt from string or list of blocks"""
|
| 200 |
if system is None:
|
| 201 |
return ""
|
| 202 |
if isinstance(system, str):
|
|
@@ -205,18 +205,13 @@ def format_system_prompt(system: Optional[Union[str, List[TextBlock]]]) -> str:
|
|
| 205 |
|
| 206 |
|
| 207 |
def format_messages_to_prompt(messages: List[MessageParam], system: Optional[Union[str, List[TextBlock]]] = None, include_thinking: bool = False) -> str:
|
| 208 |
-
"""Convert chat messages to a single prompt string"""
|
| 209 |
prompt_parts = []
|
| 210 |
-
|
| 211 |
system_text = format_system_prompt(system)
|
| 212 |
if system_text:
|
| 213 |
prompt_parts.append(f"System: {system_text}\n\n")
|
| 214 |
-
|
| 215 |
for msg in messages:
|
| 216 |
role = msg.role
|
| 217 |
content = msg.content
|
| 218 |
-
|
| 219 |
-
# Handle interleaved thinking in message history
|
| 220 |
if isinstance(content, list):
|
| 221 |
for block in content:
|
| 222 |
if isinstance(block, dict):
|
|
@@ -242,19 +237,15 @@ def format_messages_to_prompt(messages: List[MessageParam], system: Optional[Uni
|
|
| 242 |
prompt_parts.append(f"Human: {content_text}\n\n")
|
| 243 |
elif role == "assistant":
|
| 244 |
prompt_parts.append(f"Assistant: {content_text}\n\n")
|
| 245 |
-
|
| 246 |
prompt_parts.append("Assistant:")
|
| 247 |
return "".join(prompt_parts)
|
| 248 |
|
| 249 |
|
| 250 |
def generate_text(prompt: str, max_tokens: int, temperature: float, top_p: float) -> tuple:
|
| 251 |
-
"""Generate text and return (text, input_tokens, output_tokens)"""
|
| 252 |
tokenizer = models["tokenizer"]
|
| 253 |
model = models["model"]
|
| 254 |
-
|
| 255 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 256 |
input_tokens = inputs["input_ids"].shape[1]
|
| 257 |
-
|
| 258 |
with torch.no_grad():
|
| 259 |
outputs = model.generate(
|
| 260 |
**inputs,
|
|
@@ -265,24 +256,18 @@ def generate_text(prompt: str, max_tokens: int, temperature: float, top_p: float
|
|
| 265 |
pad_token_id=tokenizer.pad_token_id,
|
| 266 |
eos_token_id=tokenizer.eos_token_id
|
| 267 |
)
|
| 268 |
-
|
| 269 |
generated_tokens = outputs[0][input_tokens:]
|
| 270 |
output_tokens = len(generated_tokens)
|
| 271 |
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 272 |
-
|
| 273 |
return generated_text.strip(), input_tokens, output_tokens
|
| 274 |
|
| 275 |
|
| 276 |
def generate_thinking(prompt: str, budget_tokens: int = 100) -> tuple:
|
| 277 |
-
"""Generate thinking/reasoning content"""
|
| 278 |
tokenizer = models["tokenizer"]
|
| 279 |
model = models["model"]
|
| 280 |
-
|
| 281 |
thinking_prompt = f"{prompt}\n\nLet me think through this step by step:\n"
|
| 282 |
-
|
| 283 |
inputs = tokenizer(thinking_prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 284 |
input_tokens = inputs["input_ids"].shape[1]
|
| 285 |
-
|
| 286 |
with torch.no_grad():
|
| 287 |
outputs = model.generate(
|
| 288 |
**inputs,
|
|
@@ -293,173 +278,99 @@ def generate_thinking(prompt: str, budget_tokens: int = 100) -> tuple:
|
|
| 293 |
pad_token_id=tokenizer.pad_token_id,
|
| 294 |
eos_token_id=tokenizer.eos_token_id
|
| 295 |
)
|
| 296 |
-
|
| 297 |
generated_tokens = outputs[0][input_tokens:]
|
| 298 |
thinking_tokens = len(generated_tokens)
|
| 299 |
thinking_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 300 |
-
|
| 301 |
return thinking_text.strip(), thinking_tokens
|
| 302 |
|
| 303 |
|
| 304 |
-
async def generate_stream_with_thinking(
|
| 305 |
-
prompt: str,
|
| 306 |
-
max_tokens: int,
|
| 307 |
-
temperature: float,
|
| 308 |
-
top_p: float,
|
| 309 |
-
message_id: str,
|
| 310 |
-
model_name: str,
|
| 311 |
-
thinking_enabled: bool = False,
|
| 312 |
-
thinking_budget: int = 100
|
| 313 |
-
):
|
| 314 |
-
"""Generate streaming response with interleaved thinking in Anthropic SSE format"""
|
| 315 |
tokenizer = models["tokenizer"]
|
| 316 |
model = models["model"]
|
| 317 |
-
|
| 318 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 319 |
input_tokens = inputs["input_ids"].shape[1]
|
| 320 |
total_output_tokens = 0
|
| 321 |
|
| 322 |
-
# Send message_start event
|
| 323 |
message_start = {
|
| 324 |
"type": "message_start",
|
| 325 |
-
"message": {
|
| 326 |
-
"id": message_id,
|
| 327 |
-
"type": "message",
|
| 328 |
-
"role": "assistant",
|
| 329 |
-
"content": [],
|
| 330 |
-
"model": model_name,
|
| 331 |
-
"stop_reason": None,
|
| 332 |
-
"stop_sequence": None,
|
| 333 |
-
"usage": {"input_tokens": input_tokens, "output_tokens": 0}
|
| 334 |
-
}
|
| 335 |
}
|
| 336 |
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
| 337 |
|
| 338 |
content_index = 0
|
| 339 |
|
| 340 |
-
# Generate thinking block if enabled
|
| 341 |
if thinking_enabled:
|
| 342 |
-
|
| 343 |
-
thinking_block_start = {
|
| 344 |
-
"type": "content_block_start",
|
| 345 |
-
"index": content_index,
|
| 346 |
-
"content_block": {"type": "thinking", "thinking": ""}
|
| 347 |
-
}
|
| 348 |
yield f"event: content_block_start\ndata: {json.dumps(thinking_block_start)}\n\n"
|
| 349 |
-
|
| 350 |
-
# Generate thinking content
|
| 351 |
thinking_text, thinking_tokens = generate_thinking(prompt, thinking_budget)
|
| 352 |
total_output_tokens += thinking_tokens
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
for i in range(0, len(thinking_text), chunk_size):
|
| 357 |
-
chunk = thinking_text[i:i+chunk_size]
|
| 358 |
-
thinking_delta = {
|
| 359 |
-
"type": "content_block_delta",
|
| 360 |
-
"index": content_index,
|
| 361 |
-
"delta": {"type": "thinking_delta", "thinking": chunk}
|
| 362 |
-
}
|
| 363 |
-
yield f"event: content_block_delta\ndata: {json.dumps(thinking_delta)}\n\n"
|
| 364 |
await asyncio.sleep(0.01)
|
| 365 |
-
|
| 366 |
-
# Send thinking content_block_stop
|
| 367 |
-
thinking_block_stop = {"type": "content_block_stop", "index": content_index}
|
| 368 |
-
yield f"event: content_block_stop\ndata: {json.dumps(thinking_block_stop)}\n\n"
|
| 369 |
-
|
| 370 |
content_index += 1
|
| 371 |
|
| 372 |
-
|
| 373 |
-
text_block_start = {
|
| 374 |
-
"type": "content_block_start",
|
| 375 |
-
"index": content_index,
|
| 376 |
-
"content_block": {"type": "text", "text": ""}
|
| 377 |
-
}
|
| 378 |
-
yield f"event: content_block_start\ndata: {json.dumps(text_block_start)}\n\n"
|
| 379 |
|
| 380 |
-
# Generate main response
|
| 381 |
with torch.no_grad():
|
| 382 |
-
outputs = model.generate(
|
| 383 |
-
**inputs,
|
| 384 |
-
max_new_tokens=min(max_tokens, 256),
|
| 385 |
-
temperature=temperature if temperature > 0 else 1.0,
|
| 386 |
-
top_p=top_p,
|
| 387 |
-
do_sample=temperature > 0,
|
| 388 |
-
pad_token_id=tokenizer.pad_token_id,
|
| 389 |
-
eos_token_id=tokenizer.eos_token_id
|
| 390 |
-
)
|
| 391 |
|
| 392 |
generated_tokens = outputs[0][input_tokens:]
|
| 393 |
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
| 394 |
total_output_tokens += len(generated_tokens)
|
| 395 |
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
for i in range(0, len(generated_text), chunk_size):
|
| 399 |
-
chunk = generated_text[i:i+chunk_size]
|
| 400 |
-
text_delta = {
|
| 401 |
-
"type": "content_block_delta",
|
| 402 |
-
"index": content_index,
|
| 403 |
-
"delta": {"type": "text_delta", "text": chunk}
|
| 404 |
-
}
|
| 405 |
-
yield f"event: content_block_delta\ndata: {json.dumps(text_delta)}\n\n"
|
| 406 |
await asyncio.sleep(0.01)
|
| 407 |
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
yield f"event:
|
| 411 |
-
|
| 412 |
-
# Send message_delta event
|
| 413 |
-
message_delta = {
|
| 414 |
-
"type": "message_delta",
|
| 415 |
-
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
|
| 416 |
-
"usage": {"output_tokens": total_output_tokens}
|
| 417 |
-
}
|
| 418 |
-
yield f"event: message_delta\ndata: {json.dumps(message_delta)}\n\n"
|
| 419 |
-
|
| 420 |
-
# Send message_stop event
|
| 421 |
-
message_stop = {"type": "message_stop"}
|
| 422 |
-
yield f"event: message_stop\ndata: {json.dumps(message_stop)}\n\n"
|
| 423 |
|
| 424 |
|
| 425 |
def handle_tool_call(tools: List[Tool], messages: List[MessageParam], generated_text: str) -> Optional[ToolUseBlock]:
|
| 426 |
-
"""Check if the response should trigger a tool call"""
|
| 427 |
if not tools:
|
| 428 |
return None
|
| 429 |
-
|
| 430 |
for tool in tools:
|
| 431 |
if tool.name.lower() in generated_text.lower():
|
| 432 |
-
return ToolUseBlock(
|
| 433 |
-
type="tool_use",
|
| 434 |
-
id=f"toolu_{uuid.uuid4().hex[:24]}",
|
| 435 |
-
name=tool.name,
|
| 436 |
-
input={}
|
| 437 |
-
)
|
| 438 |
return None
|
| 439 |
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
# ============== Anthropic API Endpoints ==============
|
| 442 |
|
| 443 |
@app.post("/v1/messages")
|
| 444 |
async def create_message(request: AnthropicRequest):
|
| 445 |
-
"""
|
| 446 |
-
Anthropic Messages API compatible endpoint with Interleaved Thinking
|
| 447 |
-
|
| 448 |
-
POST /v1/messages
|
| 449 |
-
|
| 450 |
-
Supports:
|
| 451 |
-
- Text messages
|
| 452 |
-
- System prompts
|
| 453 |
-
- Streaming responses
|
| 454 |
-
- Tool/function calling
|
| 455 |
-
- Interleaved thinking blocks
|
| 456 |
-
- Thinking budget tokens
|
| 457 |
-
- Metadata
|
| 458 |
-
"""
|
| 459 |
try:
|
| 460 |
message_id = f"msg_{uuid.uuid4().hex[:24]}"
|
| 461 |
-
|
| 462 |
-
# Check if thinking is enabled
|
| 463 |
thinking_enabled = False
|
| 464 |
thinking_budget = 100
|
| 465 |
if request.thinking:
|
|
@@ -470,50 +381,26 @@ async def create_message(request: AnthropicRequest):
|
|
| 470 |
thinking_enabled = request.thinking.type == 'enabled'
|
| 471 |
thinking_budget = request.thinking.budget_tokens or 100
|
| 472 |
|
| 473 |
-
# Format messages to prompt (include thinking from history if enabled)
|
| 474 |
prompt = format_messages_to_prompt(request.messages, request.system, include_thinking=thinking_enabled)
|
| 475 |
|
| 476 |
-
# Handle streaming
|
| 477 |
if request.stream:
|
| 478 |
return StreamingResponse(
|
| 479 |
-
generate_stream_with_thinking(
|
| 480 |
-
prompt=prompt,
|
| 481 |
-
max_tokens=request.max_tokens,
|
| 482 |
-
temperature=request.temperature or 1.0,
|
| 483 |
-
top_p=request.top_p or 1.0,
|
| 484 |
-
message_id=message_id,
|
| 485 |
-
model_name=request.model,
|
| 486 |
-
thinking_enabled=thinking_enabled,
|
| 487 |
-
thinking_budget=thinking_budget
|
| 488 |
-
),
|
| 489 |
media_type="text/event-stream",
|
| 490 |
-
headers={
|
| 491 |
-
"Cache-Control": "no-cache",
|
| 492 |
-
"Connection": "keep-alive",
|
| 493 |
-
"X-Accel-Buffering": "no"
|
| 494 |
-
}
|
| 495 |
)
|
| 496 |
|
| 497 |
-
# Non-streaming response
|
| 498 |
content_blocks = []
|
| 499 |
total_output_tokens = 0
|
| 500 |
|
| 501 |
-
# Generate thinking block if enabled
|
| 502 |
if thinking_enabled:
|
| 503 |
thinking_text, thinking_tokens = generate_thinking(prompt, thinking_budget)
|
| 504 |
total_output_tokens += thinking_tokens
|
| 505 |
content_blocks.append(ThinkingBlock(type="thinking", thinking=thinking_text))
|
| 506 |
|
| 507 |
-
|
| 508 |
-
generated_text, input_tokens, output_tokens = generate_text(
|
| 509 |
-
prompt=prompt,
|
| 510 |
-
max_tokens=request.max_tokens,
|
| 511 |
-
temperature=request.temperature or 1.0,
|
| 512 |
-
top_p=request.top_p or 1.0
|
| 513 |
-
)
|
| 514 |
total_output_tokens += output_tokens
|
| 515 |
|
| 516 |
-
# Check for tool calls
|
| 517 |
tool_use = handle_tool_call(request.tools, request.messages, generated_text) if request.tools else None
|
| 518 |
|
| 519 |
if tool_use:
|
|
@@ -524,18 +411,12 @@ async def create_message(request: AnthropicRequest):
|
|
| 524 |
content_blocks.append(TextBlock(type="text", text=generated_text))
|
| 525 |
stop_reason = "end_turn"
|
| 526 |
|
| 527 |
-
return AnthropicResponse(
|
| 528 |
-
id=message_id,
|
| 529 |
-
content=content_blocks,
|
| 530 |
-
model=request.model,
|
| 531 |
-
stop_reason=stop_reason,
|
| 532 |
-
usage=Usage(input_tokens=input_tokens, output_tokens=total_output_tokens)
|
| 533 |
-
)
|
| 534 |
except Exception as e:
|
| 535 |
raise HTTPException(status_code=500, detail=str(e))
|
| 536 |
|
| 537 |
|
| 538 |
-
# ============== OpenAI Compatible
|
| 539 |
|
| 540 |
class ChatMessage(BaseModel):
|
| 541 |
role: str
|
|
@@ -553,115 +434,28 @@ class ChatCompletionRequest(BaseModel):
|
|
| 553 |
|
| 554 |
@app.post("/v1/chat/completions")
|
| 555 |
async def chat_completions(request: ChatCompletionRequest):
|
| 556 |
-
"""OpenAI Chat Completions API compatible endpoint"""
|
| 557 |
try:
|
| 558 |
-
anthropic_messages = [
|
| 559 |
-
MessageParam(role=msg.role if msg.role in ["user", "assistant"] else "user",
|
| 560 |
-
content=msg.content)
|
| 561 |
-
for msg in request.messages
|
| 562 |
-
if msg.role in ["user", "assistant"]
|
| 563 |
-
]
|
| 564 |
-
|
| 565 |
prompt = format_messages_to_prompt(anthropic_messages)
|
| 566 |
-
generated_text, input_tokens, output_tokens = generate_text(
|
| 567 |
-
|
| 568 |
-
max_tokens=request.max_tokens or 1024,
|
| 569 |
-
temperature=request.temperature or 0.7,
|
| 570 |
-
top_p=request.top_p or 1.0
|
| 571 |
-
)
|
| 572 |
-
|
| 573 |
-
return {
|
| 574 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
|
| 575 |
-
"object": "chat.completion",
|
| 576 |
-
"created": int(time.time()),
|
| 577 |
-
"model": request.model,
|
| 578 |
-
"choices": [{
|
| 579 |
-
"index": 0,
|
| 580 |
-
"message": {"role": "assistant", "content": generated_text},
|
| 581 |
-
"finish_reason": "stop"
|
| 582 |
-
}],
|
| 583 |
-
"usage": {
|
| 584 |
-
"prompt_tokens": input_tokens,
|
| 585 |
-
"completion_tokens": output_tokens,
|
| 586 |
-
"total_tokens": input_tokens + output_tokens
|
| 587 |
-
}
|
| 588 |
-
}
|
| 589 |
except Exception as e:
|
| 590 |
raise HTTPException(status_code=500, detail=str(e))
|
| 591 |
|
| 592 |
|
| 593 |
@app.get("/v1/models")
|
| 594 |
async def list_models():
|
| 595 |
-
"""
|
| 596 |
-
return {
|
| 597 |
-
"object": "list",
|
| 598 |
-
"data": [
|
| 599 |
-
{"id": "MiniMax-M2", "object": "model", "created": int(time.time()), "owned_by": "local"},
|
| 600 |
-
{"id": "MiniMax-M2-Stable", "object": "model", "created": int(time.time()), "owned_by": "local"},
|
| 601 |
-
{"id": GENERATOR_MODEL, "object": "model", "created": int(time.time()), "owned_by": "local"}
|
| 602 |
-
]
|
| 603 |
-
}
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
# ============== Utility Endpoints ==============
|
| 607 |
-
|
| 608 |
-
@app.get("/")
|
| 609 |
-
async def root():
|
| 610 |
-
"""Welcome endpoint"""
|
| 611 |
-
return {
|
| 612 |
-
"message": "Docker Model Runner API (Anthropic Compatible + Interleaved Thinking)",
|
| 613 |
-
"hardware": "CPU Basic: 2 vCPU · 16 GB RAM",
|
| 614 |
-
"docs": "/docs",
|
| 615 |
-
"api_endpoints": {
|
| 616 |
-
"anthropic_messages": "POST /v1/messages",
|
| 617 |
-
"openai_chat": "POST /v1/chat/completions",
|
| 618 |
-
"models": "GET /v1/models"
|
| 619 |
-
},
|
| 620 |
-
"supported_features": [
|
| 621 |
-
"text messages",
|
| 622 |
-
"system prompts",
|
| 623 |
-
"streaming responses",
|
| 624 |
-
"tool/function calling",
|
| 625 |
-
"interleaved thinking blocks",
|
| 626 |
-
"thinking budget tokens",
|
| 627 |
-
"metadata"
|
| 628 |
-
]
|
| 629 |
-
}
|
| 630 |
|
| 631 |
|
| 632 |
@app.get("/health")
|
| 633 |
async def health():
|
| 634 |
-
"""
|
| 635 |
-
return {
|
| 636 |
-
"status": "healthy",
|
| 637 |
-
"timestamp": datetime.utcnow().isoformat(),
|
| 638 |
-
"hardware": "CPU Basic: 2 vCPU · 16 GB RAM",
|
| 639 |
-
"models_loaded": len(models) > 0
|
| 640 |
-
}
|
| 641 |
|
| 642 |
|
| 643 |
@app.get("/info")
|
| 644 |
async def info():
|
| 645 |
-
"""
|
| 646 |
-
return {
|
| 647 |
-
"name": "Docker Model Runner",
|
| 648 |
-
"version": "1.1.0",
|
| 649 |
-
"api_compatibility": ["anthropic", "openai"],
|
| 650 |
-
"supported_models": ["MiniMax-M2", "MiniMax-M2-Stable"],
|
| 651 |
-
"interleaved_thinking": {
|
| 652 |
-
"supported": True,
|
| 653 |
-
"streaming": True,
|
| 654 |
-
"budget_tokens": True
|
| 655 |
-
},
|
| 656 |
-
"supported_parameters": {
|
| 657 |
-
"fully_supported": ["model", "messages", "max_tokens", "stream", "system", "temperature", "top_p", "tools", "tool_choice", "metadata", "thinking"],
|
| 658 |
-
"ignored": ["top_k", "stop_sequences", "service_tier"]
|
| 659 |
-
},
|
| 660 |
-
"message_types": {
|
| 661 |
-
"supported": ["text", "tool_use", "tool_result", "thinking"],
|
| 662 |
-
"not_supported": ["image", "document"]
|
| 663 |
-
}
|
| 664 |
-
}
|
| 665 |
|
| 666 |
|
| 667 |
if __name__ == "__main__":
|
|
|
|
| 4 |
Optimized for: 2 vCPU, 16GB RAM
|
| 5 |
"""
|
| 6 |
from fastapi import FastAPI, HTTPException, Header, Request
|
| 7 |
+
from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse
|
| 8 |
+
from fastapi.staticfiles import StaticFiles
|
| 9 |
from pydantic import BaseModel, Field
|
| 10 |
from typing import Optional, List, Union, Literal, Any, Dict
|
| 11 |
import torch
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
app = FastAPI(
|
| 56 |
+
title="Model Runner",
|
| 57 |
description="Anthropic API Compatible with Interleaved Thinking",
|
| 58 |
version="1.0.0",
|
| 59 |
+
lifespan=lifespan,
|
| 60 |
+
docs_url="/api/docs",
|
| 61 |
+
redoc_url="/api/redoc"
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
|
|
| 146 |
max_tokens: int = 1024
|
| 147 |
temperature: Optional[float] = Field(default=1.0, gt=0.0, le=1.0)
|
| 148 |
top_p: Optional[float] = Field(default=1.0, gt=0.0, le=1.0)
|
| 149 |
+
top_k: Optional[int] = None
|
| 150 |
+
stop_sequences: Optional[List[str]] = None
|
| 151 |
stream: Optional[bool] = False
|
| 152 |
system: Optional[Union[str, List[TextBlock]]] = None
|
| 153 |
tools: Optional[List[Tool]] = None
|
| 154 |
tool_choice: Optional[Union[ToolChoice, Dict[str, Any]]] = None
|
| 155 |
metadata: Optional[Metadata] = None
|
| 156 |
thinking: Optional[ThinkingConfig] = None
|
| 157 |
+
service_tier: Optional[str] = None
|
| 158 |
|
| 159 |
|
| 160 |
class Usage(BaseModel):
|
|
|
|
| 178 |
# ============== Helper Functions ==============
|
| 179 |
|
| 180 |
def extract_text_from_content(content: Union[str, List[ContentBlock]]) -> str:
|
|
|
|
| 181 |
if isinstance(content, str):
|
| 182 |
return content
|
|
|
|
| 183 |
texts = []
|
| 184 |
for block in content:
|
| 185 |
if isinstance(block, str):
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
def format_system_prompt(system: Optional[Union[str, List[TextBlock]]]) -> str:
|
|
|
|
| 200 |
if system is None:
|
| 201 |
return ""
|
| 202 |
if isinstance(system, str):
|
|
|
|
| 205 |
|
| 206 |
|
| 207 |
def format_messages_to_prompt(messages: List[MessageParam], system: Optional[Union[str, List[TextBlock]]] = None, include_thinking: bool = False) -> str:
|
|
|
|
| 208 |
prompt_parts = []
|
|
|
|
| 209 |
system_text = format_system_prompt(system)
|
| 210 |
if system_text:
|
| 211 |
prompt_parts.append(f"System: {system_text}\n\n")
|
|
|
|
| 212 |
for msg in messages:
|
| 213 |
role = msg.role
|
| 214 |
content = msg.content
|
|
|
|
|
|
|
| 215 |
if isinstance(content, list):
|
| 216 |
for block in content:
|
| 217 |
if isinstance(block, dict):
|
|
|
|
| 237 |
prompt_parts.append(f"Human: {content_text}\n\n")
|
| 238 |
elif role == "assistant":
|
| 239 |
prompt_parts.append(f"Assistant: {content_text}\n\n")
|
|
|
|
| 240 |
prompt_parts.append("Assistant:")
|
| 241 |
return "".join(prompt_parts)
|
| 242 |
|
| 243 |
|
| 244 |
def generate_text(prompt: str, max_tokens: int, temperature: float, top_p: float) -> tuple:
|
|
|
|
| 245 |
tokenizer = models["tokenizer"]
|
| 246 |
model = models["model"]
|
|
|
|
| 247 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 248 |
input_tokens = inputs["input_ids"].shape[1]
|
|
|
|
| 249 |
with torch.no_grad():
|
| 250 |
outputs = model.generate(
|
| 251 |
**inputs,
|
|
|
|
| 256 |
pad_token_id=tokenizer.pad_token_id,
|
| 257 |
eos_token_id=tokenizer.eos_token_id
|
| 258 |
)
|
|
|
|
| 259 |
generated_tokens = outputs[0][input_tokens:]
|
| 260 |
output_tokens = len(generated_tokens)
|
| 261 |
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
|
|
|
| 262 |
return generated_text.strip(), input_tokens, output_tokens
|
| 263 |
|
| 264 |
|
| 265 |
def generate_thinking(prompt: str, budget_tokens: int = 100) -> tuple:
|
|
|
|
| 266 |
tokenizer = models["tokenizer"]
|
| 267 |
model = models["model"]
|
|
|
|
| 268 |
thinking_prompt = f"{prompt}\n\nLet me think through this step by step:\n"
|
|
|
|
| 269 |
inputs = tokenizer(thinking_prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 270 |
input_tokens = inputs["input_ids"].shape[1]
|
|
|
|
| 271 |
with torch.no_grad():
|
| 272 |
outputs = model.generate(
|
| 273 |
**inputs,
|
|
|
|
| 278 |
pad_token_id=tokenizer.pad_token_id,
|
| 279 |
eos_token_id=tokenizer.eos_token_id
|
| 280 |
)
|
|
|
|
| 281 |
generated_tokens = outputs[0][input_tokens:]
|
| 282 |
thinking_tokens = len(generated_tokens)
|
| 283 |
thinking_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
|
|
|
| 284 |
return thinking_text.strip(), thinking_tokens
|
| 285 |
|
| 286 |
|
| 287 |
+
async def generate_stream_with_thinking(prompt: str, max_tokens: int, temperature: float, top_p: float, message_id: str, model_name: str, thinking_enabled: bool = False, thinking_budget: int = 100):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
tokenizer = models["tokenizer"]
|
| 289 |
model = models["model"]
|
|
|
|
| 290 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 291 |
input_tokens = inputs["input_ids"].shape[1]
|
| 292 |
total_output_tokens = 0
|
| 293 |
|
|
|
|
| 294 |
message_start = {
|
| 295 |
"type": "message_start",
|
| 296 |
+
"message": {"id": message_id, "type": "message", "role": "assistant", "content": [], "model": model_name, "stop_reason": None, "stop_sequence": None, "usage": {"input_tokens": input_tokens, "output_tokens": 0}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
}
|
| 298 |
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
| 299 |
|
| 300 |
content_index = 0
|
| 301 |
|
|
|
|
| 302 |
if thinking_enabled:
|
| 303 |
+
thinking_block_start = {"type": "content_block_start", "index": content_index, "content_block": {"type": "thinking", "thinking": ""}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
yield f"event: content_block_start\ndata: {json.dumps(thinking_block_start)}\n\n"
|
|
|
|
|
|
|
| 305 |
thinking_text, thinking_tokens = generate_thinking(prompt, thinking_budget)
|
| 306 |
total_output_tokens += thinking_tokens
|
| 307 |
+
for i in range(0, len(thinking_text), 10):
|
| 308 |
+
chunk = thinking_text[i:i+10]
|
| 309 |
+
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_index, 'delta': {'type': 'thinking_delta', 'thinking': chunk}})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
await asyncio.sleep(0.01)
|
| 311 |
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_index})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
content_index += 1
|
| 313 |
|
| 314 |
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_index, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
|
|
|
| 316 |
with torch.no_grad():
|
| 317 |
+
outputs = model.generate(**inputs, max_new_tokens=min(max_tokens, 256), temperature=temperature if temperature > 0 else 1.0, top_p=top_p, do_sample=temperature > 0, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
generated_tokens = outputs[0][input_tokens:]
|
| 320 |
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
| 321 |
total_output_tokens += len(generated_tokens)
|
| 322 |
|
| 323 |
+
for i in range(0, len(generated_text), 5):
|
| 324 |
+
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_index, 'delta': {'type': 'text_delta', 'text': generated_text[i:i+5]}})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
await asyncio.sleep(0.01)
|
| 326 |
|
| 327 |
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_index})}\n\n"
|
| 328 |
+
yield f"event: message_delta\ndata: {json.dumps({'type': 'message_delta', 'delta': {'stop_reason': 'end_turn', 'stop_sequence': None}, 'usage': {'output_tokens': total_output_tokens}})}\n\n"
|
| 329 |
+
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
|
| 332 |
def handle_tool_call(tools: List[Tool], messages: List[MessageParam], generated_text: str) -> Optional[ToolUseBlock]:
|
|
|
|
| 333 |
if not tools:
|
| 334 |
return None
|
|
|
|
| 335 |
for tool in tools:
|
| 336 |
if tool.name.lower() in generated_text.lower():
|
| 337 |
+
return ToolUseBlock(type="tool_use", id=f"toolu_{uuid.uuid4().hex[:24]}", name=tool.name, input={})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
return None
|
| 339 |
|
| 340 |
|
| 341 |
+
# ============== Frontend ==============
|
| 342 |
+
|
| 343 |
+
@app.get("/", response_class=HTMLResponse)
|
| 344 |
+
async def home():
|
| 345 |
+
"""Serve the minimal centered frontend"""
|
| 346 |
+
try:
|
| 347 |
+
with open("/app/static/index.html", "r") as f:
|
| 348 |
+
return HTMLResponse(content=f.read())
|
| 349 |
+
except:
|
| 350 |
+
return HTMLResponse(content="""
|
| 351 |
+
<!DOCTYPE html>
|
| 352 |
+
<html><head><meta charset="UTF-8"><title>Model Runner</title>
|
| 353 |
+
<style>*{margin:0;padding:0}body{min-height:100vh;background:#000;display:flex;justify-content:center;align-items:center}
|
| 354 |
+
.logo{width:200px;height:200px;animation:float 3s ease-in-out infinite}
|
| 355 |
+
@keyframes float{0%,100%{transform:translateY(0)}50%{transform:translateY(-10px)}}</style></head>
|
| 356 |
+
<body><div class="logo"><svg viewBox="0 0 200 200" fill="none">
|
| 357 |
+
<defs><linearGradient id="r" x1="0%" y1="100%" x2="100%" y2="0%">
|
| 358 |
+
<stop offset="0%" stop-color="#ff0080"/><stop offset="25%" stop-color="#ff4d00"/>
|
| 359 |
+
<stop offset="50%" stop-color="#ffcc00"/><stop offset="75%" stop-color="#00ff88"/>
|
| 360 |
+
<stop offset="100%" stop-color="#00ccff"/></linearGradient></defs>
|
| 361 |
+
<path d="M100 20 L180 160 L20 160 Z" stroke="url(#r)" stroke-width="12" stroke-linecap="round" fill="none"/>
|
| 362 |
+
<path d="M100 70 L130 130 L70 130 Z" stroke="url(#r)" stroke-width="8" stroke-linecap="round" fill="none"/>
|
| 363 |
+
<line x1="80" y1="115" x2="120" y2="115" stroke="url(#r)" stroke-width="6" stroke-linecap="round"/>
|
| 364 |
+
</svg></div></body></html>
|
| 365 |
+
""")
|
| 366 |
+
|
| 367 |
+
|
| 368 |
# ============== Anthropic API Endpoints ==============
|
| 369 |
|
| 370 |
@app.post("/v1/messages")
|
| 371 |
async def create_message(request: AnthropicRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
try:
|
| 373 |
message_id = f"msg_{uuid.uuid4().hex[:24]}"
|
|
|
|
|
|
|
| 374 |
thinking_enabled = False
|
| 375 |
thinking_budget = 100
|
| 376 |
if request.thinking:
|
|
|
|
| 381 |
thinking_enabled = request.thinking.type == 'enabled'
|
| 382 |
thinking_budget = request.thinking.budget_tokens or 100
|
| 383 |
|
|
|
|
| 384 |
prompt = format_messages_to_prompt(request.messages, request.system, include_thinking=thinking_enabled)
|
| 385 |
|
|
|
|
| 386 |
if request.stream:
|
| 387 |
return StreamingResponse(
|
| 388 |
+
generate_stream_with_thinking(prompt, request.max_tokens, request.temperature or 1.0, request.top_p or 1.0, message_id, request.model, thinking_enabled, thinking_budget),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
media_type="text/event-stream",
|
| 390 |
+
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
)
|
| 392 |
|
|
|
|
| 393 |
content_blocks = []
|
| 394 |
total_output_tokens = 0
|
| 395 |
|
|
|
|
| 396 |
if thinking_enabled:
|
| 397 |
thinking_text, thinking_tokens = generate_thinking(prompt, thinking_budget)
|
| 398 |
total_output_tokens += thinking_tokens
|
| 399 |
content_blocks.append(ThinkingBlock(type="thinking", thinking=thinking_text))
|
| 400 |
|
| 401 |
+
generated_text, input_tokens, output_tokens = generate_text(prompt, request.max_tokens, request.temperature or 1.0, request.top_p or 1.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
total_output_tokens += output_tokens
|
| 403 |
|
|
|
|
| 404 |
tool_use = handle_tool_call(request.tools, request.messages, generated_text) if request.tools else None
|
| 405 |
|
| 406 |
if tool_use:
|
|
|
|
| 411 |
content_blocks.append(TextBlock(type="text", text=generated_text))
|
| 412 |
stop_reason = "end_turn"
|
| 413 |
|
| 414 |
+
return AnthropicResponse(id=message_id, content=content_blocks, model=request.model, stop_reason=stop_reason, usage=Usage(input_tokens=input_tokens, output_tokens=total_output_tokens))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
except Exception as e:
|
| 416 |
raise HTTPException(status_code=500, detail=str(e))
|
| 417 |
|
| 418 |
|
| 419 |
+
# ============== OpenAI Compatible ==============
|
| 420 |
|
| 421 |
class ChatMessage(BaseModel):
|
| 422 |
role: str
|
|
|
|
| 434 |
|
| 435 |
@app.post("/v1/chat/completions")
|
| 436 |
async def chat_completions(request: ChatCompletionRequest):
|
|
|
|
| 437 |
try:
|
| 438 |
+
anthropic_messages = [MessageParam(role=msg.role if msg.role in ["user", "assistant"] else "user", content=msg.content) for msg in request.messages if msg.role in ["user", "assistant"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
prompt = format_messages_to_prompt(anthropic_messages)
|
| 440 |
+
generated_text, input_tokens, output_tokens = generate_text(prompt, request.max_tokens or 1024, request.temperature or 0.7, request.top_p or 1.0)
|
| 441 |
+
return {"id": f"chatcmpl-{uuid.uuid4().hex[:24]}", "object": "chat.completion", "created": int(time.time()), "model": request.model, "choices": [{"index": 0, "message": {"role": "assistant", "content": generated_text}, "finish_reason": "stop"}], "usage": {"prompt_tokens": input_tokens, "completion_tokens": output_tokens, "total_tokens": input_tokens + output_tokens}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
except Exception as e:
|
| 443 |
raise HTTPException(status_code=500, detail=str(e))
|
| 444 |
|
| 445 |
|
| 446 |
@app.get("/v1/models")
|
| 447 |
async def list_models():
|
| 448 |
+
return {"object": "list", "data": [{"id": "MiniMax-M2", "object": "model", "created": int(time.time()), "owned_by": "local"}, {"id": "MiniMax-M2-Stable", "object": "model", "created": int(time.time()), "owned_by": "local"}, {"id": GENERATOR_MODEL, "object": "model", "created": int(time.time()), "owned_by": "local"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
|
| 450 |
|
| 451 |
@app.get("/health")
|
| 452 |
async def health():
|
| 453 |
+
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat(), "models_loaded": len(models) > 0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
|
| 456 |
@app.get("/info")
|
| 457 |
async def info():
|
| 458 |
+
return {"name": "Model Runner", "version": "1.1.0", "api_compatibility": ["anthropic", "openai"], "interleaved_thinking": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
|
| 461 |
if __name__ == "__main__":
|
static/index.html
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Model Runner</title>
|
| 7 |
+
<style>
|
| 8 |
+
* {
|
| 9 |
+
margin: 0;
|
| 10 |
+
padding: 0;
|
| 11 |
+
box-sizing: border-box;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
body {
|
| 15 |
+
min-height: 100vh;
|
| 16 |
+
background: #000000;
|
| 17 |
+
display: flex;
|
| 18 |
+
justify-content: center;
|
| 19 |
+
align-items: center;
|
| 20 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 21 |
+
overflow: hidden;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
.container {
|
| 25 |
+
display: flex;
|
| 26 |
+
flex-direction: column;
|
| 27 |
+
align-items: center;
|
| 28 |
+
gap: 2rem;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
.logo {
|
| 32 |
+
width: 200px;
|
| 33 |
+
height: 200px;
|
| 34 |
+
position: relative;
|
| 35 |
+
animation: float 3s ease-in-out infinite;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.logo svg {
|
| 39 |
+
width: 100%;
|
| 40 |
+
height: 100%;
|
| 41 |
+
filter: drop-shadow(0 0 30px rgba(255, 100, 100, 0.3));
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.status {
|
| 45 |
+
display: flex;
|
| 46 |
+
align-items: center;
|
| 47 |
+
gap: 0.5rem;
|
| 48 |
+
color: rgba(255, 255, 255, 0.6);
|
| 49 |
+
font-size: 0.875rem;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.status-dot {
|
| 53 |
+
width: 8px;
|
| 54 |
+
height: 8px;
|
| 55 |
+
background: #22c55e;
|
| 56 |
+
border-radius: 50%;
|
| 57 |
+
animation: pulse 2s ease-in-out infinite;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.sparkle {
|
| 61 |
+
position: fixed;
|
| 62 |
+
bottom: 2rem;
|
| 63 |
+
right: 2rem;
|
| 64 |
+
opacity: 0.4;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
@keyframes float {
|
| 68 |
+
0%, 100% { transform: translateY(0); }
|
| 69 |
+
50% { transform: translateY(-10px); }
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
@keyframes pulse {
|
| 73 |
+
0%, 100% { opacity: 1; transform: scale(1); }
|
| 74 |
+
50% { opacity: 0.5; transform: scale(1.2); }
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
@keyframes spin {
|
| 78 |
+
from { transform: rotate(0deg); }
|
| 79 |
+
to { transform: rotate(360deg); }
|
| 80 |
+
}
|
| 81 |
+
</style>
|
| 82 |
+
</head>
|
| 83 |
+
<body>
|
| 84 |
+
<div class="container">
|
| 85 |
+
<div class="logo">
|
| 86 |
+
<svg viewBox="0 0 200 200" fill="none" xmlns="http://www.w3.org/2000/svg">
|
| 87 |
+
<defs>
|
| 88 |
+
<linearGradient id="rainbow" x1="0%" y1="100%" x2="100%" y2="0%">
|
| 89 |
+
<stop offset="0%" stop-color="#ff0080"/>
|
| 90 |
+
<stop offset="20%" stop-color="#ff4d00"/>
|
| 91 |
+
<stop offset="40%" stop-color="#ffcc00"/>
|
| 92 |
+
<stop offset="60%" stop-color="#00ff88"/>
|
| 93 |
+
<stop offset="80%" stop-color="#00ccff"/>
|
| 94 |
+
<stop offset="100%" stop-color="#6644ff"/>
|
| 95 |
+
</linearGradient>
|
| 96 |
+
</defs>
|
| 97 |
+
<!-- Outer triangle -->
|
| 98 |
+
<path d="M100 20 L180 160 L20 160 Z" stroke="url(#rainbow)" stroke-width="12" stroke-linecap="round" stroke-linejoin="round" fill="none"/>
|
| 99 |
+
<!-- Inner A shape -->
|
| 100 |
+
<path d="M100 70 L130 130 L70 130 Z" stroke="url(#rainbow)" stroke-width="8" stroke-linecap="round" stroke-linejoin="round" fill="none"/>
|
| 101 |
+
<!-- Horizontal bar -->
|
| 102 |
+
<line x1="80" y1="115" x2="120" y2="115" stroke="url(#rainbow)" stroke-width="6" stroke-linecap="round"/>
|
| 103 |
+
</svg>
|
| 104 |
+
</div>
|
| 105 |
+
<div class="status">
|
| 106 |
+
<span class="status-dot"></span>
|
| 107 |
+
<span>Ready</span>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
|
| 111 |
+
<svg class="sparkle" width="24" height="24" viewBox="0 0 24 24" fill="none">
|
| 112 |
+
<path d="M12 2L13.5 8.5L20 10L13.5 11.5L12 18L10.5 11.5L4 10L10.5 8.5L12 2Z" fill="rgba(255,255,255,0.6)"/>
|
| 113 |
+
</svg>
|
| 114 |
+
</body>
|
| 115 |
+
</html>
|