Spaces:

CGIAR
/

farmerbot

Sleeping

App Files Files Community

farmerbot / utils.py

Nelly43

Update app

0c6d13f about 1 month ago

raw

history blame contribute delete

5.71 kB

	import pandas as pd
	import datetime
	import gradio as gr
	import os
	# It shows the demo data format in finetuning tab
	def move_to(move,model_ans):
	df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
	id_temp=int((df_temp.loc[move])['id'])
	ques_temp=(df_temp.loc[move])['question']
	ans_temp=(df_temp.loc[move])['answer']
	if int(move)>=len(df_temp)+1:
	gr.Info(f"Number of questions: {len(df_temp)}")
	move=0
	return [
	gr.Label(value=str(id_temp),label="ID"),
	gr.Label(value=ques_temp,label="Question"),
	gr.Label(value=ans_temp,label="Answer")
	]
	def display_table(path=r"data/demo_table_data.xlsx"):
	df = pd.read_excel(path)
	df_with_custom_index = df.head(2)
	# df_with_custom_index.index = [f"Row {i+1}" for i in range(len(df_with_custom_index))]
	html_table = df_with_custom_index.to_html(index=False)
	return f"<div style='overflow-x:auto;'>{html_table}</div>"
	def current_time():
	# ff="model_ans_llama_finetuned486_rag_ensemble"
	# df=pd.read_excel(r"model_ans/model_ans_mistral_finetuned486_rag_ensemble.xlsx")
	current_datetime = datetime.datetime.now()
	# file_name = current_datetime.strftime("%Y_%m_%d_%H_%M_%S")+ff
	return current_datetime.strftime("%Y_%m_%d_%H_%M_%S")
	# This function use in human evaluation
	def random_ques_ans2():
	import random
	import pandas as pd
	df=pd.read_excel(r"data/existing_dataset.xlsx")
	id=random.randint(0,len(df))
	ques_temp=(df.loc[id])['question']
	ans_temp=""
	return ques_temp,ans_temp
	def score_report_bar():
	path="score_report"
	import os
	import math
	dat=[]
	for x in os.listdir(path):
	wh=[]
	flag=0
	for x2 in x:
	if x2>='a' and x2<='z':
	flag=1
	wh.append(x2)
	elif flag==1:
	wh.append(" ")
	wh=''.join(wh)
	wh=wh.replace("model ans","")
	wh=wh.replace("finetuned","")
	wh=wh.replace(" "," ")
	wh=wh.replace("xlsx","")
	df_temp=pd.read_excel(os.path.join(path,x))
	rating=sum(df_temp["rating"])/len(df_temp)
	dat.append({
	"Model Name":wh,
	"Average Rating":rating
	})
	temp=pd.DataFrame(dat)
	return temp
	def parse_data(link,progress):
	from bs4 import BeautifulSoup
	import requests
	import re
	from docx import Document
	from langchain_community.document_loaders import WebBaseLoader
	s=set()
	import time
	start_time = time.time()
	duration = 5
	def get_links(url):
	response = requests.get(url)
	data = response.text
	soup = BeautifulSoup(data, 'lxml')

	links = []
	for link in soup.find_all('a'):
	link_url = link.get('href')
	if link_url is not None and link_url.startswith('http'):
	s.add(link_url)
	links.append(link_url)

	return links
	# def write_to_file(links):
	# with open('data.txt', 'a') as f:
	# f.writelines(links)
	def get_all_links(url):
	for link in get_links(url):
	if (time.time() - start_time) >= duration:
	return
	get_all_links(link)

	def data_ret2(link):
	loader = WebBaseLoader(f"{link}")
	data = loader.load()
	return data[0].page_content
	# link = 'https://kuet.ac.bd'
	s.add(link)
	get_all_links(link)
	li=list(s)
	all_data=[]
	for x in progress.tqdm(li):
	try:
	print("Link: ",x)
	all_data.append(data_ret2(x))
	except:
	print("pass")
	continue
	all_data2 = re.sub(r'\n+', '\n\n', "\n".join(all_data))
	all_data2=re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', '', all_data2)
	document = Document()
	document.add_paragraph(all_data2)
	document.save(f'rag_data/{link}.docx')
	print("Finished!!")
	return
	def all_contri_ans(id, ques):
	folder_path = 'save_ques_ans'
	data_frames = []
	for filename in os.listdir(folder_path):
	if filename.endswith(".xlsx") or filename.endswith(".xls"):
	file_path = os.path.join(folder_path, filename)
	df = pd.read_excel(file_path)
	data_frames.append(df)

	df_hum = pd.concat(data_frames, ignore_index=True)
	temp=[]
	for x,y in zip(df_hum['question'],df_hum['answer']):
	if x==ques:
	temp.append(y)
	if len(temp)==0:
	temp=["This question's answer is not available."]
	return temp
	import json
	import os

	def save_params_to_file(model_name,embedding_name, splitter_type_dropdown, chunk_size_slider,
	chunk_overlap_slider, separator_textbox, max_tokens_slider, filename="params.txt"):
	params = {
	"model_name":model_name,
	"embedding_name": embedding_name,
	"splitter_type_dropdown": splitter_type_dropdown,
	"chunk_size_slider": chunk_size_slider,
	"chunk_overlap_slider": chunk_overlap_slider,
	"separator_textbox": separator_textbox,
	"max_tokens_slider": max_tokens_slider
	}

	with open(filename, 'w') as f:
	json.dump(params, f)
	with open("deploy//params.txt", 'w') as f:
	json.dump(params, f)

	def load_params_from_file(filename="params.txt"):
	if os.path.exists(filename):
	with open(filename, 'r') as f:
	params = json.load(f)
	return params
	else:
	return None