Spaces:
Sleeping
Sleeping
eliujl
commited on
Commit
·
32944e5
1
Parent(s):
7f1213b
Improved the support for using local model
Browse filesAllow a local model to be downloaded if not existing, otherwise using the local model.
- app.py +45 -14
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -121,9 +121,7 @@ def setup_docsearch(use_pinecone, pinecone_index_name, embeddings, chroma_collec
|
|
| 121 |
index_info = index_client.describe_index_stats()
|
| 122 |
# namespace_name = ''
|
| 123 |
# if index_info is not None:
|
| 124 |
-
# print(index_info)
|
| 125 |
# print(index_info['namespaces'][namespace_name]['vector_count'])
|
| 126 |
-
# print(index_info['total_vector_count'])
|
| 127 |
# else:
|
| 128 |
# print("Index information is not available.")
|
| 129 |
# n_texts = index_info['namespaces'][namespace_name]['vector_count']
|
|
@@ -146,18 +144,38 @@ def get_response(query, chat_history, CRqa):
|
|
| 146 |
result = CRqa({"question": query, "chat_history": chat_history})
|
| 147 |
return result['answer'], result['source_documents']
|
| 148 |
|
|
|
|
| 149 |
@st.cache_resource()
|
| 150 |
-
def use_local_llm(r_llm):
|
| 151 |
from langchain.llms import LlamaCpp
|
| 152 |
from langchain.callbacks.manager import CallbackManager
|
| 153 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
| 155 |
if r_llm == gpt_local_mistral:
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
else:
|
| 158 |
-
|
|
|
|
| 159 |
llm = LlamaCpp(
|
| 160 |
-
model_path=
|
| 161 |
temperature=0.0,
|
| 162 |
n_batch=300,
|
| 163 |
n_ctx=4000,
|
|
@@ -196,7 +214,7 @@ def setup_prompt():
|
|
| 196 |
)
|
| 197 |
return prompt
|
| 198 |
|
| 199 |
-
def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
|
| 200 |
if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
|
| 201 |
# Set up OpenAI embeddings
|
| 202 |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
|
@@ -204,11 +222,14 @@ def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
|
|
| 204 |
# Set the temperature to be 0 if you do not want it to make up things
|
| 205 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
| 206 |
openai_api_key=OPENAI_API_KEY)
|
| 207 |
-
|
| 208 |
#em_model_name = 'hkunlp/instructor-xl'
|
| 209 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
| 210 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
| 211 |
-
llm = use_local_llm(r_llm)
|
|
|
|
|
|
|
|
|
|
| 212 |
return embeddings, llm
|
| 213 |
|
| 214 |
|
|
@@ -236,11 +257,13 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 236 |
reply = ''
|
| 237 |
source = ''
|
| 238 |
LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
|
|
|
|
|
|
|
| 239 |
# Get user input of whether to use Pinecone or not
|
| 240 |
col1, col2, col3 = st.columns([1, 1, 1])
|
| 241 |
# create the radio buttons and text input fields
|
| 242 |
with col1:
|
| 243 |
-
r_llm = st.multiselect('LLM:', LLMs, gpt3p5)
|
| 244 |
if not r_llm:
|
| 245 |
r_llm = gpt3p5
|
| 246 |
else:
|
|
@@ -268,7 +291,6 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 268 |
st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
|
| 269 |
else:
|
| 270 |
st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
|
| 271 |
-
embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm)
|
| 272 |
with col3:
|
| 273 |
if use_pinecone == True:
|
| 274 |
PINECONE_API_KEY = st.text_input(
|
|
@@ -282,8 +304,17 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 282 |
chroma_collection_name = st.text_input(
|
| 283 |
'''Chroma collection name of 3-63 characters:''')
|
| 284 |
persist_directory = "./vectorstore"
|
| 285 |
-
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
session_name = pinecone_index_name + chroma_collection_name
|
| 288 |
if r_ingest.lower() == 'yes':
|
| 289 |
files = st.file_uploader(
|
|
@@ -367,7 +398,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 367 |
all_chat_history_str = '\n'.join(
|
| 368 |
[f'{x[0]}: {x[1]}' for x in all_chats])
|
| 369 |
st.title(':blue[All chat records]')
|
| 370 |
-
st.text_area('', value=all_chat_history_str, height=250, label_visibility='collapsed')
|
| 371 |
if __name__ == '__main__':
|
| 372 |
main(pinecone_index_name, chroma_collection_name, persist_directory,
|
| 373 |
docsearch_ready, directory_name)
|
|
|
|
| 121 |
index_info = index_client.describe_index_stats()
|
| 122 |
# namespace_name = ''
|
| 123 |
# if index_info is not None:
|
|
|
|
| 124 |
# print(index_info['namespaces'][namespace_name]['vector_count'])
|
|
|
|
| 125 |
# else:
|
| 126 |
# print("Index information is not available.")
|
| 127 |
# n_texts = index_info['namespaces'][namespace_name]['vector_count']
|
|
|
|
| 144 |
result = CRqa({"question": query, "chat_history": chat_history})
|
| 145 |
return result['answer'], result['source_documents']
|
| 146 |
|
| 147 |
+
|
| 148 |
@st.cache_resource()
|
| 149 |
+
def use_local_llm(r_llm, local_llm_path):
|
| 150 |
from langchain.llms import LlamaCpp
|
| 151 |
from langchain.callbacks.manager import CallbackManager
|
| 152 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 153 |
+
from huggingface_hub import hf_hub_download
|
| 154 |
+
model_tuples = [
|
| 155 |
+
("TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
| 156 |
+
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
| 157 |
+
("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
|
| 158 |
+
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 159 |
+
("TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 160 |
+
]
|
| 161 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
| 162 |
if r_llm == gpt_local_mistral:
|
| 163 |
+
model_name, model_file, model_type, model_link = model_tuples[0]
|
| 164 |
+
else:
|
| 165 |
+
model_name, model_file, model_type, model_link = model_tuples[3]
|
| 166 |
+
model_path = os.path.join( local_llm_path, model_name, model_file )
|
| 167 |
+
model_path = os.path.normpath( model_path )
|
| 168 |
+
if not os.path.exists(model_path):
|
| 169 |
+
print("model not existing at ", model_path, "\n")
|
| 170 |
+
model_path = hf_hub_download(repo_id=model_name, filename=model_file, repo_type="model",
|
| 171 |
+
#cache_dir=local_llm_path,
|
| 172 |
+
local_dir=local_llm_path, local_dir_use_symlinks=False)
|
| 173 |
+
print("\n model downloaded at path=",model_path)
|
| 174 |
else:
|
| 175 |
+
print("model existing at ", model_path)
|
| 176 |
+
|
| 177 |
llm = LlamaCpp(
|
| 178 |
+
model_path=model_path,
|
| 179 |
temperature=0.0,
|
| 180 |
n_batch=300,
|
| 181 |
n_ctx=4000,
|
|
|
|
| 214 |
)
|
| 215 |
return prompt
|
| 216 |
|
| 217 |
+
def setup_em_llm(OPENAI_API_KEY, temperature, r_llm, local_llm_path):
|
| 218 |
if (r_llm == gpt3p5 or r_llm == gpt4) and OPENAI_API_KEY:
|
| 219 |
# Set up OpenAI embeddings
|
| 220 |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
|
|
|
| 222 |
# Set the temperature to be 0 if you do not want it to make up things
|
| 223 |
llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
|
| 224 |
openai_api_key=OPENAI_API_KEY)
|
| 225 |
+
elif r_llm == gpt_local_mistral or r_llm == gpt_local_llama:
|
| 226 |
#em_model_name = 'hkunlp/instructor-xl'
|
| 227 |
em_model_name='sentence-transformers/all-mpnet-base-v2'
|
| 228 |
embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
|
| 229 |
+
llm = use_local_llm(r_llm, local_llm_path)
|
| 230 |
+
else:
|
| 231 |
+
embeddings = []
|
| 232 |
+
llm = []
|
| 233 |
return embeddings, llm
|
| 234 |
|
| 235 |
|
|
|
|
| 257 |
reply = ''
|
| 258 |
source = ''
|
| 259 |
LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
|
| 260 |
+
local_llm_path = './models/'
|
| 261 |
+
user_llm_path = ''
|
| 262 |
# Get user input of whether to use Pinecone or not
|
| 263 |
col1, col2, col3 = st.columns([1, 1, 1])
|
| 264 |
# create the radio buttons and text input fields
|
| 265 |
with col1:
|
| 266 |
+
r_llm = st.multiselect(label='LLM:', options=LLMs, default=gpt3p5, max_selections=1)
|
| 267 |
if not r_llm:
|
| 268 |
r_llm = gpt3p5
|
| 269 |
else:
|
|
|
|
| 291 |
st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
|
| 292 |
else:
|
| 293 |
st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
|
|
|
|
| 294 |
with col3:
|
| 295 |
if use_pinecone == True:
|
| 296 |
PINECONE_API_KEY = st.text_input(
|
|
|
|
| 304 |
chroma_collection_name = st.text_input(
|
| 305 |
'''Chroma collection name of 3-63 characters:''')
|
| 306 |
persist_directory = "./vectorstore"
|
| 307 |
+
if use_openai == False:
|
| 308 |
+
user_llm_path = st.text_input(
|
| 309 |
+
"Path for local model (TO BE DOWNLOADED IF NOT EXISTING), type 'default' to use default path:",
|
| 310 |
+
placeholder="default")
|
| 311 |
+
if 'default' in user_llm_path:
|
| 312 |
+
user_llm_path = local_llm_path
|
| 313 |
+
|
| 314 |
+
if ( (pinecone_index_name or chroma_collection_name)
|
| 315 |
+
and ( (use_openai and OPENAI_API_KEY) or (not use_openai and user_llm_path) ) ):
|
| 316 |
+
embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm, user_llm_path)
|
| 317 |
+
#if ( pinecone_index_name or chroma_collection_name ) and embeddings and llm:
|
| 318 |
session_name = pinecone_index_name + chroma_collection_name
|
| 319 |
if r_ingest.lower() == 'yes':
|
| 320 |
files = st.file_uploader(
|
|
|
|
| 398 |
all_chat_history_str = '\n'.join(
|
| 399 |
[f'{x[0]}: {x[1]}' for x in all_chats])
|
| 400 |
st.title(':blue[All chat records]')
|
| 401 |
+
st.text_area('Chat records in ascending order:', value=all_chat_history_str, height=250, label_visibility='collapsed')
|
| 402 |
if __name__ == '__main__':
|
| 403 |
main(pinecone_index_name, chroma_collection_name, persist_directory,
|
| 404 |
docsearch_ready, directory_name)
|
requirements.txt
CHANGED
|
@@ -11,4 +11,5 @@ pymupdf
|
|
| 11 |
tabulate
|
| 12 |
sentence-transformers
|
| 13 |
llama-cpp-python
|
|
|
|
| 14 |
altair<5
|
|
|
|
| 11 |
tabulate
|
| 12 |
sentence-transformers
|
| 13 |
llama-cpp-python
|
| 14 |
+
huggingface-hub
|
| 15 |
altair<5
|