Spaces:

ThinkAI-Morocco
/

TESTing

Sleeping

TESTing / app.py

Update app.py

ef37e4a verified over 1 year ago

1.62 kB

	import gradio as gr

	gr.load("models/meta-llama/Meta-Llama-3-8B").launch()

	# import transformers
	# import torch
	# import os

	# os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]
	# os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
	# # os.environ["USE_FLASH_ATTENTION"] = "1"

	# print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
	# print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
	# print(f"torch version: {torch.version}")


	# # model_id = "meta-llama/Meta-Llama-3-8B"

	# # pipeline = transformers.pipeline(
	# # "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
	# # )
	# # pipeline("Hey how are you doing today?")

	# model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

	# pipeline = transformers.pipeline(
	# "text-generation",
	# model=model_id,
	# model_kwargs={"torch_dtype": torch.bfloat16},
	# device_map="auto",
	# )

	# messages = [
	# {
	# "role": "system",
	# "content": "You are a pirate chatbot who always responds in pirate speak!",
	# },
	# {"role": "user", "content": "Who are you?"},
	# ]

	# prompt = pipeline.tokenizer.apply_chat_template(
	# messages, tokenize=False, add_generation_prompt=True
	# )

	# terminators = [
	# pipeline.tokenizer.eos_token_id,
	# pipeline.tokenizer.convert_tokens_to_ids("<\|eot_id\|>"),
	# ]

	# outputs = pipeline(
	# prompt,
	# max_new_tokens=256,
	# eos_token_id=terminators,
	# do_sample=True,
	# temperature=0.6,
	# top_p=0.9,
	# )
	# print(outputs[0]["generated_text"][len(prompt) :])

	# print("hello")