|
|
import pandas as pd
|
|
|
import datetime
|
|
|
import gradio as gr
|
|
|
import os
|
|
|
|
|
|
def move_to(move,model_ans):
|
|
|
df_temp=pd.read_excel(os.path.join("model_ans",str(model_ans)))
|
|
|
id_temp=int((df_temp.loc[move])['id'])
|
|
|
ques_temp=(df_temp.loc[move])['question']
|
|
|
ans_temp=(df_temp.loc[move])['answer']
|
|
|
if int(move)>=len(df_temp)+1:
|
|
|
gr.Info(f"Number of questions: {len(df_temp)}")
|
|
|
move=0
|
|
|
return [
|
|
|
gr.Label(value=str(id_temp),label="ID"),
|
|
|
gr.Label(value=ques_temp,label="Question"),
|
|
|
gr.Label(value=ans_temp,label="Answer")
|
|
|
]
|
|
|
def display_table(path=r"data/demo_table_data.xlsx"):
|
|
|
df = pd.read_excel(path)
|
|
|
df_with_custom_index = df.head(2)
|
|
|
|
|
|
html_table = df_with_custom_index.to_html(index=False)
|
|
|
return f"<div style='overflow-x:auto;'>{html_table}</div>"
|
|
|
def current_time():
|
|
|
|
|
|
|
|
|
current_datetime = datetime.datetime.now()
|
|
|
|
|
|
return current_datetime.strftime("%Y_%m_%d_%H_%M_%S")
|
|
|
|
|
|
def random_ques_ans2():
|
|
|
import random
|
|
|
import pandas as pd
|
|
|
df=pd.read_excel(r"data/existing_dataset.xlsx")
|
|
|
id=random.randint(0,len(df))
|
|
|
ques_temp=(df.loc[id])['question']
|
|
|
ans_temp=""
|
|
|
return ques_temp,ans_temp
|
|
|
def score_report_bar():
|
|
|
path="score_report"
|
|
|
import os
|
|
|
import math
|
|
|
dat=[]
|
|
|
for x in os.listdir(path):
|
|
|
wh=[]
|
|
|
flag=0
|
|
|
for x2 in x:
|
|
|
if x2>='a' and x2<='z':
|
|
|
flag=1
|
|
|
wh.append(x2)
|
|
|
elif flag==1:
|
|
|
wh.append(" ")
|
|
|
wh=''.join(wh)
|
|
|
wh=wh.replace("model ans","")
|
|
|
wh=wh.replace("finetuned","")
|
|
|
wh=wh.replace(" "," ")
|
|
|
wh=wh.replace("xlsx","")
|
|
|
df_temp=pd.read_excel(os.path.join(path,x))
|
|
|
rating=sum(df_temp["rating"])/len(df_temp)
|
|
|
dat.append({
|
|
|
"Model Name":wh,
|
|
|
"Average Rating":rating
|
|
|
})
|
|
|
temp=pd.DataFrame(dat)
|
|
|
return temp
|
|
|
def parse_data(link,progress):
|
|
|
from bs4 import BeautifulSoup
|
|
|
import requests
|
|
|
import re
|
|
|
from docx import Document
|
|
|
from langchain_community.document_loaders import WebBaseLoader
|
|
|
s=set()
|
|
|
import time
|
|
|
start_time = time.time()
|
|
|
duration = 5
|
|
|
def get_links(url):
|
|
|
response = requests.get(url)
|
|
|
data = response.text
|
|
|
soup = BeautifulSoup(data, 'lxml')
|
|
|
|
|
|
links = []
|
|
|
for link in soup.find_all('a'):
|
|
|
link_url = link.get('href')
|
|
|
if link_url is not None and link_url.startswith('http'):
|
|
|
s.add(link_url)
|
|
|
links.append(link_url)
|
|
|
|
|
|
return links
|
|
|
|
|
|
|
|
|
|
|
|
def get_all_links(url):
|
|
|
for link in get_links(url):
|
|
|
if (time.time() - start_time) >= duration:
|
|
|
return
|
|
|
get_all_links(link)
|
|
|
|
|
|
def data_ret2(link):
|
|
|
loader = WebBaseLoader(f"{link}")
|
|
|
data = loader.load()
|
|
|
return data[0].page_content
|
|
|
|
|
|
s.add(link)
|
|
|
get_all_links(link)
|
|
|
li=list(s)
|
|
|
all_data=[]
|
|
|
for x in progress.tqdm(li):
|
|
|
try:
|
|
|
print("Link: ",x)
|
|
|
all_data.append(data_ret2(x))
|
|
|
except:
|
|
|
print("pass")
|
|
|
continue
|
|
|
all_data2 = re.sub(r'\n+', '\n\n', "\n".join(all_data))
|
|
|
all_data2=re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', '', all_data2)
|
|
|
document = Document()
|
|
|
document.add_paragraph(all_data2)
|
|
|
document.save(f'rag_data/{link}.docx')
|
|
|
print("Finished!!")
|
|
|
return
|
|
|
def all_contri_ans(id, ques):
|
|
|
folder_path = 'save_ques_ans'
|
|
|
data_frames = []
|
|
|
for filename in os.listdir(folder_path):
|
|
|
if filename.endswith(".xlsx") or filename.endswith(".xls"):
|
|
|
file_path = os.path.join(folder_path, filename)
|
|
|
df = pd.read_excel(file_path)
|
|
|
data_frames.append(df)
|
|
|
|
|
|
df_hum = pd.concat(data_frames, ignore_index=True)
|
|
|
temp=[]
|
|
|
for x,y in zip(df_hum['question'],df_hum['answer']):
|
|
|
if x==ques:
|
|
|
temp.append(y)
|
|
|
if len(temp)==0:
|
|
|
temp=["This question's answer is not available."]
|
|
|
return temp
|
|
|
import json
|
|
|
import os
|
|
|
|
|
|
def save_params_to_file(model_name,embedding_name, splitter_type_dropdown, chunk_size_slider,
|
|
|
chunk_overlap_slider, separator_textbox, max_tokens_slider, filename="params.txt"):
|
|
|
params = {
|
|
|
"model_name":model_name,
|
|
|
"embedding_name": embedding_name,
|
|
|
"splitter_type_dropdown": splitter_type_dropdown,
|
|
|
"chunk_size_slider": chunk_size_slider,
|
|
|
"chunk_overlap_slider": chunk_overlap_slider,
|
|
|
"separator_textbox": separator_textbox,
|
|
|
"max_tokens_slider": max_tokens_slider
|
|
|
}
|
|
|
|
|
|
with open(filename, 'w') as f:
|
|
|
json.dump(params, f)
|
|
|
with open("deploy//params.txt", 'w') as f:
|
|
|
json.dump(params, f)
|
|
|
|
|
|
def load_params_from_file(filename="params.txt"):
|
|
|
if os.path.exists(filename):
|
|
|
with open(filename, 'r') as f:
|
|
|
params = json.load(f)
|
|
|
return params
|
|
|
else:
|
|
|
return None |