| from huggingface_hub import InferenceClient | |
| class Qwen2_5: | |
| def __init__(self, token): | |
| self.client = InferenceClient(token=token) | |
| self.model_id = "Qwen/Qwen2.5-72B-Instruct" | |
| def generate_stream(self, prompt, max_tokens=1000, temperature=0.1): | |
| for message in self.client.chat_completion( | |
| model=self.model_id, | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stream=True, | |
| ): | |
| if message.choices: | |
| content = message.choices[0].delta.content | |
| if content: | |
| yield content | |
| def generate(self, prompt, max_tokens=1000, temperature=0.1): | |
| return "".join(self.generate_stream(prompt, max_tokens=max_tokens, temperature=temperature)) |