import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import snapshot_download # Set device to CPU device = "cpu" repo_id = 'amgadhasan/phi-2' model_path = snapshot_download(repo_id=repo_id, repo_type="model", local_dir="./phi-2", use_auth_token=False) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # Set default dtype to float32 for compatibility with CPU torch.set_default_dtype(torch.float32) model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True) def generate(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate(**inputs, max_length=200) completion = tokenizer.decode(outputs[0], skip_special_tokens=True) return completion def ask_question(user_question): if user_question.lower() == 'quit': return "Session ended. Goodbye!" else: # Here, we're explicitly setting the context for an academic answer. prompt = f"Academic response to the question about basic science subjects: {user_question}" answer = generate(prompt) return answer iface = gr.Interface(fn=ask_question, inputs="text", outputs="text") iface.launch(share=True)