#import torch #from transformers import AutoTokenizer, AutoModelForCausalLM from langchain_community.llms import HuggingFaceHub from langchain_community.llms import HuggingFaceTextGenInference # Load your Telugu model """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_name = "Telugu-LLM-Labs/Telugu-Llama2-7B-v0-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device) """ ENDPOINT_URL = "https://api-inference.huggingface.co/models/Telugu-LLM-Labs/Telugu-Llama2-7B-v0-Instruct" HF_TOKEN = os.getenv("huggingface_token") llm = HuggingFaceTextGenInference( inference_server_url=ENDPOINT_URL, max_new_tokens=512, top_k=50, temperature=0.1, repetition_penalty=1.03, server_kwargs={ "headers": { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json", } }, ) def summarize(text, llm): instruction = "కింది వచనాన్ని సంగ్రహించండి: " prompt = instruction + text response = llm(prompt) return response input_text = "గూగుల్ వార్తలు అనేది గూగుల్ ద్వారా అభివృద్ధి చేయబడిన వార్తా అగ్రిగేటర్ సేవ..." result = summarize(input_text, llm) print(result)