#import torch
#from transformers import AutoTokenizer, AutoModelForCausalLM

from langchain_community.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceTextGenInference

# Load your Telugu model
""" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = "Telugu-LLM-Labs/Telugu-Llama2-7B-v0-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device) """


ENDPOINT_URL = "https://api-inference.huggingface.co/models/Telugu-LLM-Labs/Telugu-Llama2-7B-v0-Instruct"
HF_TOKEN = os.getenv("huggingface_token")

llm = HuggingFaceTextGenInference(
    inference_server_url=ENDPOINT_URL,
    max_new_tokens=512,
    top_k=50,
    temperature=0.1,
    repetition_penalty=1.03,
    server_kwargs={
        "headers": {
            "Authorization": f"Bearer {HF_TOKEN}",
            "Content-Type": "application/json",
        }
    },
)

def summarize(text, llm):
     instruction = "కింది వచనాన్ని సంగ్రహించండి: "
     prompt = instruction + text 
     response = llm(prompt)
     return response

input_text = "గూగుల్ వార్తలు అనేది గూగుల్ ద్వారా అభివృద్ధి చేయబడిన వార్తా అగ్రిగేటర్ సేవ..." 
result = summarize(input_text, llm)
print(result)