|
|
""" |
|
|
Custom Inference Handler for Huseyin/tekno25 Model |
|
|
Hugging Face Inference Endpoints için özelleştirilmiş handler |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from typing import Dict, List, Any |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, path=""): |
|
|
""" |
|
|
Model ve tokenizer'ı yükle |
|
|
|
|
|
Args: |
|
|
path: Model dosyalarının bulunduğu dizin |
|
|
""" |
|
|
logger.info(f"Model yükleniyor: {path}") |
|
|
|
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained( |
|
|
path, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
|
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
path, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
|
|
|
if self.tokenizer.pad_token is None: |
|
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
|
|
|
logger.info("Model başarıyla yüklendi!") |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Inference endpoint'i için ana fonksiyon |
|
|
|
|
|
Args: |
|
|
data: İstek verisi |
|
|
- inputs (str veya List[str]): Giriş metni/metinleri |
|
|
- parameters (dict, optional): Generasyon parametreleri |
|
|
|
|
|
Returns: |
|
|
List[Dict]: Üretilen metin(ler) |
|
|
""" |
|
|
try: |
|
|
|
|
|
inputs = data.get("inputs", "") |
|
|
parameters = data.get("parameters", {}) |
|
|
|
|
|
|
|
|
if isinstance(inputs, str): |
|
|
inputs = [inputs] |
|
|
|
|
|
|
|
|
default_params = { |
|
|
"max_new_tokens": 512, |
|
|
"temperature": 0.7, |
|
|
"top_p": 0.9, |
|
|
"top_k": 50, |
|
|
"do_sample": True, |
|
|
"repetition_penalty": 1.1, |
|
|
"return_full_text": False |
|
|
} |
|
|
|
|
|
|
|
|
generation_params = {**default_params, **parameters} |
|
|
|
|
|
|
|
|
return_full_text = generation_params.pop("return_full_text", False) |
|
|
|
|
|
|
|
|
results = [] |
|
|
|
|
|
for text_input in inputs: |
|
|
|
|
|
encoded_inputs = self.tokenizer( |
|
|
text_input, |
|
|
return_tensors="pt", |
|
|
padding=True, |
|
|
truncation=True, |
|
|
max_length=2048 |
|
|
).to(self.model.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
output_ids = self.model.generate( |
|
|
**encoded_inputs, |
|
|
**generation_params |
|
|
) |
|
|
|
|
|
|
|
|
if return_full_text: |
|
|
|
|
|
generated_text = self.tokenizer.decode( |
|
|
output_ids[0], |
|
|
skip_special_tokens=True |
|
|
) |
|
|
else: |
|
|
|
|
|
input_length = encoded_inputs.input_ids.shape[1] |
|
|
generated_text = self.tokenizer.decode( |
|
|
output_ids[0][input_length:], |
|
|
skip_special_tokens=True |
|
|
) |
|
|
|
|
|
results.append({ |
|
|
"generated_text": generated_text |
|
|
}) |
|
|
|
|
|
return results |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Inference sırasında hata: {str(e)}") |
|
|
return [{ |
|
|
"error": str(e), |
|
|
"error_type": type(e).__name__ |
|
|
}] |
|
|
|
|
|
|
|
|
|
|
|
class PipelineHandler: |
|
|
""" |
|
|
Transformers pipeline kullanarak daha basit bir handler |
|
|
""" |
|
|
|
|
|
def __init__(self, path=""): |
|
|
from transformers import pipeline |
|
|
|
|
|
logger.info(f"Pipeline yükleniyor: {path}") |
|
|
|
|
|
self.pipeline = pipeline( |
|
|
"text-generation", |
|
|
model=path, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
logger.info("Pipeline başarıyla yüklendi!") |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Pipeline tabanlı inference |
|
|
""" |
|
|
try: |
|
|
inputs = data.get("inputs", "") |
|
|
parameters = data.get("parameters", {}) |
|
|
|
|
|
|
|
|
default_params = { |
|
|
"max_new_tokens": 512, |
|
|
"temperature": 0.7, |
|
|
"top_p": 0.9, |
|
|
"do_sample": True, |
|
|
"return_full_text": False |
|
|
} |
|
|
|
|
|
generation_params = {**default_params, **parameters} |
|
|
|
|
|
|
|
|
outputs = self.pipeline( |
|
|
inputs, |
|
|
**generation_params |
|
|
) |
|
|
|
|
|
return outputs |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Pipeline inference hatası: {str(e)}") |
|
|
return [{ |
|
|
"error": str(e), |
|
|
"error_type": type(e).__name__ |
|
|
}] |
|
|
|