Spaces:

Mat17892
/

iris

Runtime error

desert commited on Nov 29, 2024

Commit

d67d04a

1 Parent(s): 21886ee

del

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,22 @@ import gradio as gr
 from unsloth import FastLanguageModel
 import torch
-max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
-dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
-load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "llama_lora_model_1",
-    max_seq_length = max_seq_length,
-    dtype = dtype,
-    load_in_4bit = load_in_4bit,
 )
 # Respond function
 def respond(
     message,
@@ -45,7 +50,7 @@ def respond(
     # Generate the response using your model
     outputs = model.generate(
-        input_ids=inputs["input_ids"],
         max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,

 from unsloth import FastLanguageModel
 import torch
+max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
+dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.
+# Check for GPU availability and use the appropriate device
+device = "cuda" if torch.cuda.is_available() else "cpu"
 model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="llama_lora_model_1",
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
 )
+model.to(device)  # Move model to the appropriate device
 # Respond function
 def respond(
     message,
     # Generate the response using your model
     outputs = model.generate(
+        input_ids=inputs["input_ids"].to(device),  # Ensure input is on the correct device
         max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,