Spaces:

CGIAR
/

farmerbot

Sleeping

App Files Files Community

farmerbot / deploy /model_ret.py

Nelly43

Update app

0c6d13f about 1 month ago

raw

history blame contribute delete

4.32 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration, pipeline
	from langchain import HuggingFacePipeline
	import os
	import torch

	def load_model_and_pipeline(model_info, quantization=4, is_t5=False, use_local=True):
	# Check if the model is local or should be downloaded from Hugging Face
	# if use_local:
	# path = f"models/{model_info}"
	# if not os.path.exists(path):
	# print(f"Local model not found at {path}. Downloading from Hugging Face...")
	# use_local = False # Fallback to Hugging Face download if local not found
	# if not use_local:
	# # Replace model_info with the corresponding Hugging Face repo name
	# hf_model_map = {
	# "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta",
	# "llama-3-8b": "NousResearch/Meta-Llama-3-8B",
	# "mistral-7b": "unsloth/mistral-7b-instruct-v0.3",
	# "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
	# "flan-t5-base": "google/flan-t5-base"
	# }
	# path = hf_model_map.get(model_info.split("_")[1], model_info)

	tokenizer = AutoTokenizer.from_pretrained(model_info, use_auth_token=True)

	if quantization == "8":
	model = AutoModelForCausalLM.from_pretrained(
	model_info,
	device_map='auto',
	torch_dtype=torch.float16,
	use_auth_token=True,
	load_in_8bit=True
	)
	else:
	model = AutoModelForCausalLM.from_pretrained(
	model_info,
	device_map='auto',
	torch_dtype=torch.float16,
	use_auth_token=True,
	load_in_4bit=True
	)

	if is_t5:
	model = T5ForConditionalGeneration.from_pretrained(model_info)
	tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	max_new_tokens=512,
	do_sample=True,
	top_k=30,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id
	)

	llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
	return tokenizer, model, llm

	def zephyr_model(model_info, quantization, use_local=True):
	return load_model_and_pipeline(model_info, quantization, use_local=use_local)

	def llama_model(model_info, quantization, use_local=True):
	return load_model_and_pipeline(model_info, quantization, use_local=use_local)

	def mistral_model(model_info, quantization, use_local=True):
	return load_model_and_pipeline(model_info, quantization, use_local=use_local)

	def phi_model(model_info, quantization, use_local=True):
	return load_model_and_pipeline(model_info, quantization, use_local=use_local)

	def flant5_model(model_info, use_local=True):
	return load_model_and_pipeline(model_info, is_t5=True, use_local=use_local)


	import pandas as pd
	from datasets import Dataset

	def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
	# Create a dictionary from the model_ques_ans_gen list
	from ragas import evaluate
	from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
	data_samples = {
	'question': [item['question'] for item in model_ques_ans_gen],
	'answer': [item['answer'] for item in model_ques_ans_gen],
	'contexts': [item['contexts'] for item in model_ques_ans_gen],
	'ground_truths': [item['ground_truths'] for item in model_ques_ans_gen]
	}

	# Convert the dictionary to a pandas DataFrame
	rag_df = pd.DataFrame(data_samples)

	# Convert the DataFrame to a HuggingFace Dataset
	rag_eval_dataset = Dataset.from_pandas(rag_df)

	# Define the list of metrics to calculate
	metrics = [
	"answer_correctness", "answer_similarity",
	"answer_relevancy", "faithfulness",
	"context_recall", "context_precision"
	]

	# Perform the evaluation using the provided LLM and embedding models
	result = evaluate(
	rag_eval_dataset,
	metrics=metrics,
	llm=llm_model,
	embeddings=embedding_model
	)
	result.to_pandas()
	return result