import gradio as gr
import onnxruntime as ort
import numpy as np
import cv2
from huggingface_hub import hf_hub_download # <-- IMPORT THE DOWNLOADER

# --- 1. GLOBAL SETUP: DOWNLOAD AND LOAD MODELS AT STARTUP ---
# This is the recommended way to use models in a Space.
try:
    print("Downloading and loading ONNX models from the Hub...")
    
    # Define your model repository ID
    MODEL_REPO = "rtr46/meiki.text.detect.v0"
    
    # hf_hub_download will download the file and cache it, returning the local path.
    tiny_model_path = hf_hub_download(repo_id=MODEL_REPO, filename="meiki.text.detect.tiny.v0.onnx")
    small_model_path = hf_hub_download(repo_id=MODEL_REPO, filename="meiki.text.detect.small.v0.onnx")
    
    # Use CPUExecutionProvider for broad compatibility
    providers = ['CPUExecutionProvider']
    ort_session_tiny = ort.InferenceSession(tiny_model_path, providers=providers)
    ort_session_small = ort.InferenceSession(small_model_path, providers=providers)
    
    print("Models loaded successfully.")
except Exception as e:
    print(f"Error loading models: {e}")
    # If models fail to load, the app will not work.
    ort_session_tiny = None
    ort_session_small = None

# --- 2. HELPER FUNCTION: PREPROCESSING ---
# (This section remains exactly the same)
def resize_and_pad(image: np.ndarray, size: int, is_color: bool):
    """ Resizes and pads an image, works for both grayscale and color. """
    if is_color:
        h, w, _ = image.shape
    else:
        h, w = image.shape

    ratio = min(size / w, size / h)
    new_w, new_h = int(w * ratio), int(h * ratio)
    
    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
    
    if is_color:
        padded_image = np.zeros((size, size, 3), dtype=np.uint8)
    else:
        padded_image = np.zeros((size, size), dtype=np.uint8)
        
    pad_w, pad_h = (size - new_w) // 2, (size - new_h) // 2
    padded_image[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = resized_image
    return padded_image, ratio, pad_w, pad_h

# --- 3. CORE INFERENCE FUNCTION ---
# (This section remains exactly the same)
def detect_text(model_name, input_image, confidence_threshold):
    """
    Performs text detection on the input image using the selected model.
    """
    if ort_session_tiny is None or ort_session_small is None:
        raise gr.Error("Models are not loaded. Please check the console logs for errors.")

    if model_name == "tiny":
        session = ort_session_tiny
        model_size = 320
        is_color = False
    else: # "small"
        session = ort_session_small
        model_size = 640
        is_color = True

    output_image = input_image.copy()

    if is_color:
        image_for_model = input_image
    else:
        image_for_model = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
        
    padded_image, ratio, pad_w, pad_h = resize_and_pad(image_for_model, model_size, is_color)
    img_normalized = padded_image.astype(np.float32) / 255.0

    if is_color:
        img_transposed = np.transpose(img_normalized, (2, 0, 1))
        input_tensor = np.expand_dims(img_transposed, axis=0)
    else:
        input_tensor = np.expand_dims(np.expand_dims(img_normalized, axis=0), axis=0)

    sizes_tensor = np.array([[model_size, model_size]], dtype=np.int64)
    input_names = [inp.name for inp in session.get_inputs()]
    inputs = {input_names[0]: input_tensor, input_names[1]: sizes_tensor}
    
    outputs = session.run(None, inputs)

    if model_name == "tiny":
        boxes = outputs[0]
        scores = [1.0] * len(boxes)
    else:
        _, boxes, scores = outputs
        boxes, scores = boxes[0], scores[0]
    
    box_count = 0
    for box, score in zip(boxes, scores):
        if score < confidence_threshold:
            continue
        
        box_count += 1
        x_min, y_min, x_max, y_max = box
        
        final_x_min = int((x_min - pad_w) / ratio)
        final_y_min = int((y_min - pad_h) / ratio)
        final_x_max = int((x_max - pad_w) / ratio)
        final_y_max = int((y_max - pad_h) / ratio)

        color = (0, 255, 0) if model_name == "small" else (0, 0, 255)
        cv2.rectangle(output_image, (final_x_min, final_y_min), (final_x_max, final_y_max), color, 2)
        
    print(f"Processed with '{model_name}' model. Found {box_count} boxes with confidence > {confidence_threshold}.")
    
    return output_image

# --- 4. GRADIO INTERFACE ---
# (This section remains exactly the same)
with gr.Blocks() as demo:
    gr.Markdown("# meiki text detect v0")
    gr.Markdown(
        "upload an image and choose a model to detect horizontal and vertical text lines. "
        "the **small** model is more accurate, especially for images with many text lines like manga, while the **tiny** model is much faster."
    )
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="upload image")
            model_name = gr.Radio(
                ["tiny", "small"], label="choose model", value="small"
            )
            confidence_threshold = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.4, step=0.1, label="confidence threshold"
            )
            detect_button = gr.Button("detect text", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(type="numpy", label="result")
            
    detect_button.click(
        fn=detect_text,
        inputs=[model_name, input_image, confidence_threshold],
        outputs=output_image
    )

# --- 5. LAUNCH THE APP ---
demo.launch()