Spaces:
Runtime error
Runtime error
| import functools | |
| import gradio as gr | |
| from modules import shared | |
| loaders_and_params = { | |
| 'AutoGPTQ': [ | |
| 'triton', | |
| 'no_inject_fused_attention', | |
| 'no_inject_fused_mlp', | |
| 'no_use_cuda_fp16', | |
| 'wbits', | |
| 'groupsize', | |
| 'desc_act', | |
| 'gpu_memory', | |
| 'cpu_memory', | |
| 'cpu', | |
| 'disk', | |
| 'auto_devices', | |
| 'trust_remote_code', | |
| 'autogptq_info', | |
| ], | |
| 'GPTQ-for-LLaMa': [ | |
| 'wbits', | |
| 'groupsize', | |
| 'model_type', | |
| 'pre_layer', | |
| 'gptq_for_llama_info', | |
| ], | |
| 'llama.cpp': [ | |
| 'n_ctx', | |
| 'n_gqa', | |
| 'rms_norm_eps', | |
| 'n_gpu_layers', | |
| 'n_batch', | |
| 'threads', | |
| 'no_mmap', | |
| 'low_vram', | |
| 'mlock', | |
| 'llama_cpp_seed', | |
| 'compress_pos_emb', | |
| 'alpha_value', | |
| ], | |
| 'llamacpp_HF': [ | |
| 'n_ctx', | |
| 'n_gqa', | |
| 'rms_norm_eps', | |
| 'n_gpu_layers', | |
| 'n_batch', | |
| 'threads', | |
| 'no_mmap', | |
| 'low_vram', | |
| 'mlock', | |
| 'llama_cpp_seed', | |
| 'compress_pos_emb', | |
| 'alpha_value', | |
| 'llamacpp_HF_info', | |
| ], | |
| 'Transformers': [ | |
| 'cpu_memory', | |
| 'gpu_memory', | |
| 'trust_remote_code', | |
| 'load_in_8bit', | |
| 'bf16', | |
| 'cpu', | |
| 'disk', | |
| 'auto_devices', | |
| 'load_in_4bit', | |
| 'use_double_quant', | |
| 'quant_type', | |
| 'compute_dtype', | |
| 'trust_remote_code', | |
| 'transformers_info' | |
| ], | |
| 'ExLlama': [ | |
| 'gpu_split', | |
| 'max_seq_len', | |
| 'compress_pos_emb', | |
| 'alpha_value', | |
| 'exllama_info', | |
| ], | |
| 'ExLlama_HF': [ | |
| 'gpu_split', | |
| 'max_seq_len', | |
| 'compress_pos_emb', | |
| 'alpha_value', | |
| 'exllama_HF_info', | |
| ] | |
| } | |
| loaders_samplers = { | |
| 'Transformers': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'epsilon_cutoff', | |
| 'eta_cutoff', | |
| 'tfs', | |
| 'top_a', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'encoder_repetition_penalty', | |
| 'no_repeat_ngram_size', | |
| 'min_length', | |
| 'seed', | |
| 'do_sample', | |
| 'penalty_alpha', | |
| 'num_beams', | |
| 'length_penalty', | |
| 'early_stopping', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| 'add_bos_token', | |
| 'skip_special_tokens', | |
| }, | |
| 'ExLlama_HF': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'epsilon_cutoff', | |
| 'eta_cutoff', | |
| 'tfs', | |
| 'top_a', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'encoder_repetition_penalty', | |
| 'no_repeat_ngram_size', | |
| 'min_length', | |
| 'seed', | |
| 'do_sample', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| 'add_bos_token', | |
| 'skip_special_tokens', | |
| }, | |
| 'ExLlama': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'seed', | |
| 'ban_eos_token', | |
| }, | |
| 'AutoGPTQ': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'epsilon_cutoff', | |
| 'eta_cutoff', | |
| 'tfs', | |
| 'top_a', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'encoder_repetition_penalty', | |
| 'no_repeat_ngram_size', | |
| 'min_length', | |
| 'seed', | |
| 'do_sample', | |
| 'penalty_alpha', | |
| 'num_beams', | |
| 'length_penalty', | |
| 'early_stopping', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| 'add_bos_token', | |
| 'skip_special_tokens', | |
| }, | |
| 'GPTQ-for-LLaMa': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'epsilon_cutoff', | |
| 'eta_cutoff', | |
| 'tfs', | |
| 'top_a', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'encoder_repetition_penalty', | |
| 'no_repeat_ngram_size', | |
| 'min_length', | |
| 'seed', | |
| 'do_sample', | |
| 'penalty_alpha', | |
| 'num_beams', | |
| 'length_penalty', | |
| 'early_stopping', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| 'add_bos_token', | |
| 'skip_special_tokens', | |
| }, | |
| 'llama.cpp': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'tfs', | |
| 'repetition_penalty', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| }, | |
| 'llamacpp_HF': { | |
| 'temperature', | |
| 'top_p', | |
| 'top_k', | |
| 'typical_p', | |
| 'epsilon_cutoff', | |
| 'eta_cutoff', | |
| 'tfs', | |
| 'top_a', | |
| 'repetition_penalty', | |
| 'repetition_penalty_range', | |
| 'encoder_repetition_penalty', | |
| 'no_repeat_ngram_size', | |
| 'min_length', | |
| 'seed', | |
| 'do_sample', | |
| 'mirostat_mode', | |
| 'mirostat_tau', | |
| 'mirostat_eta', | |
| 'ban_eos_token', | |
| 'add_bos_token', | |
| 'skip_special_tokens', | |
| }, | |
| } | |
| def list_all_samplers(): | |
| all_samplers = set() | |
| for k in loaders_samplers: | |
| for sampler in loaders_samplers[k]: | |
| all_samplers.add(sampler) | |
| return sorted(all_samplers) | |
| def blacklist_samplers(loader): | |
| all_samplers = list_all_samplers() | |
| if loader == 'All': | |
| return [gr.update(visible=True) for sampler in all_samplers] | |
| else: | |
| return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] | |
| def get_gpu_memory_keys(): | |
| return [k for k in shared.gradio if k.startswith('gpu_memory')] | |
| def get_all_params(): | |
| all_params = set() | |
| for k in loaders_and_params: | |
| for el in loaders_and_params[k]: | |
| all_params.add(el) | |
| if 'gpu_memory' in all_params: | |
| all_params.remove('gpu_memory') | |
| for k in get_gpu_memory_keys(): | |
| all_params.add(k) | |
| return sorted(all_params) | |
| def make_loader_params_visible(loader): | |
| params = [] | |
| all_params = get_all_params() | |
| if loader in loaders_and_params: | |
| params = loaders_and_params[loader] | |
| if 'gpu_memory' in params: | |
| params.remove('gpu_memory') | |
| params += get_gpu_memory_keys() | |
| return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params] | |