Spaces:

pngwn
/

gradio-llama-cpp-2

Sleeping

pngwn HF Staff commited on May 11, 2024

Commit

eaf6d5b

verified ·

1 Parent(s): d1a85cc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,21 @@ import os
 import gradio as gr
 import copy
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-llm = Llama(
-    model_path=hf_hub_download(
-        repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
-        filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
-    ),
-    n_ctx=2048,
-    n_gpu_layers=-1, # change n_gpu_layers if you have more or less VRAM
-)
 def generate_text(
@@ -26,7 +30,13 @@ def generate_text(
     temp = ""
     input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
     for interaction in history:
-        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
     input_prompt = input_prompt + str(message) + " [/INST] "
@@ -58,11 +68,11 @@ demo = gr.ChatInterface(
     title="llama-cpp-python on GPU",
     description="Running LLM with https://github.com/abetlen/llama-cpp-python",
     examples=[
-        ['How to setup a human base on Mars? Give short answer.'],
-        ['Explain theory of relativity to me like I’m 8 years old.'],
-        ['What is 9,000 * 9,000?'],
-        ['Write a pun-filled happy birthday message to my friend Alex.'],
-        ['Justify why a penguin might make a good king of the jungle.']
     ],
     cache_examples=False,
     retry_btn=None,

 import gradio as gr
 import copy
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+try:
+    llm = Llama(
+        model_path=hf_hub_download(
+            repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
+            filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
+        ),
+        n_ctx=2048,
+        n_gpu_layers=-1,  # change n_gpu_layers if you have more or less VRAM
+    )
+except Exception as e:
+    print(e)
 def generate_text(
     temp = ""
     input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
     for interaction in history:
+        input_prompt = (
+            input_prompt
+            + str(interaction[0])
+            + " [/INST] "
+            + str(interaction[1])
+            + " </s><s> [INST] "
+        )
     input_prompt = input_prompt + str(message) + " [/INST] "
     title="llama-cpp-python on GPU",
     description="Running LLM with https://github.com/abetlen/llama-cpp-python",
     examples=[
+        ["How to setup a human base on Mars? Give short answer."],
+        ["Explain theory of relativity to me like I’m 8 years old."],
+        ["What is 9,000 * 9,000?"],
+        ["Write a pun-filled happy birthday message to my friend Alex."],
+        ["Justify why a penguin might make a good king of the jungle."],
     ],
     cache_examples=False,
     retry_btn=None,