| | from ctransformers import AutoConfig, AutoModelForCausalLM |
| |
|
| | from modules import shared |
| | from modules.callbacks import Iteratorize |
| | from modules.logging_colors import logger |
| |
|
| |
|
| | class CtransformersModel: |
| | def __init__(self): |
| | pass |
| |
|
| | @classmethod |
| | def from_pretrained(cls, path): |
| | result = cls() |
| |
|
| | config = AutoConfig.from_pretrained( |
| | str(path), |
| | threads=shared.args.threads if shared.args.threads != 0 else -1, |
| | gpu_layers=shared.args.n_gpu_layers, |
| | batch_size=shared.args.n_batch, |
| | context_length=shared.args.n_ctx, |
| | stream=True, |
| | mmap=not shared.args.no_mmap, |
| | mlock=shared.args.mlock |
| | ) |
| |
|
| | result.model = AutoModelForCausalLM.from_pretrained( |
| | str(result.model_dir(path) if result.model_type_is_auto() else path), |
| | model_type=(None if result.model_type_is_auto() else shared.args.model_type), |
| | config=config |
| | ) |
| |
|
| | logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}') |
| | return result, result |
| |
|
| | def model_type_is_auto(self): |
| | return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" |
| |
|
| | def model_dir(self, path): |
| | if path.is_file(): |
| | return path.parent |
| |
|
| | return path |
| |
|
| | def encode(self, string, **kwargs): |
| | return self.model.tokenize(string) |
| |
|
| | def decode(self, ids): |
| | return self.model.detokenize(ids) |
| |
|
| | def generate(self, prompt, state, callback=None): |
| | prompt = prompt if type(prompt) is str else prompt.decode() |
| | |
| | generator = self.model( |
| | prompt=prompt, |
| | max_new_tokens=state['max_new_tokens'], |
| | temperature=state['temperature'], |
| | top_p=state['top_p'], |
| | top_k=state['top_k'], |
| | repetition_penalty=state['repetition_penalty'], |
| | last_n_tokens=state['repetition_penalty_range'], |
| | seed=int(state['seed']) |
| | ) |
| |
|
| | output = "" |
| | for token in generator: |
| | if callback: |
| | callback(token) |
| |
|
| | output += token |
| |
|
| | return output |
| |
|
| | def generate_with_streaming(self, *args, **kwargs): |
| | with Iteratorize(self.generate, args, kwargs, callback=None) as generator: |
| | reply = '' |
| | for token in generator: |
| | reply += token |
| | yield reply |
| |
|