Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	uu
Browse files
    	
        app.py
    CHANGED
    
    | @@ -2,30 +2,23 @@ import gradio as gr | |
| 2 | 
             
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
         | 
| 3 | 
             
            import torch
         | 
| 4 |  | 
| 5 | 
            -
            # Load model and tokenizer with trust_remote_code=True
         | 
| 6 | 
             
            model_id = "PowerInfer/SmallThinker-21BA3B-Instruct"
         | 
| 7 |  | 
| 8 | 
            -
            tokenizer = AutoTokenizer.from_pretrained(
         | 
| 9 | 
            -
                model_id,
         | 
| 10 | 
            -
                trust_remote_code=True  # Required for models with custom code
         | 
| 11 | 
            -
            )
         | 
| 12 | 
            -
             | 
| 13 | 
             
            model = AutoModelForCausalLM.from_pretrained(
         | 
| 14 | 
             
                model_id,
         | 
| 15 | 
            -
                device_map=" | 
| 16 | 
            -
                torch_dtype=torch. | 
| 17 | 
            -
                trust_remote_code=True | 
| 18 | 
             
            )
         | 
| 19 |  | 
| 20 | 
            -
            # Create text generation pipeline
         | 
| 21 | 
             
            generator = pipeline(
         | 
| 22 | 
             
                "text-generation",
         | 
| 23 | 
             
                model=model,
         | 
| 24 | 
             
                tokenizer=tokenizer,
         | 
| 25 | 
            -
                device | 
| 26 | 
             
            )
         | 
| 27 |  | 
| 28 | 
            -
            # Define the chat function
         | 
| 29 | 
             
            def chat(prompt, max_new_tokens=256, temperature=0.7):
         | 
| 30 | 
             
                output = generator(
         | 
| 31 | 
             
                    prompt,
         | 
| @@ -36,8 +29,8 @@ def chat(prompt, max_new_tokens=256, temperature=0.7): | |
| 36 | 
             
                )
         | 
| 37 | 
             
                return output[0]["generated_text"]
         | 
| 38 |  | 
| 39 | 
            -
            #  | 
| 40 | 
            -
            gr.Interface(
         | 
| 41 | 
             
                fn=chat,
         | 
| 42 | 
             
                inputs=[
         | 
| 43 | 
             
                    gr.Textbox(label="Prompt", lines=4, placeholder="Ask anything..."),
         | 
| @@ -46,5 +39,8 @@ gr.Interface( | |
| 46 | 
             
                ],
         | 
| 47 | 
             
                outputs=gr.Textbox(label="Response"),
         | 
| 48 | 
             
                title="💬 SmallThinker-21BA3B-Instruct",
         | 
| 49 | 
            -
                description="Run PowerInfer/SmallThinker-21BA3B-Instruct | 
| 50 | 
            -
            ) | 
|  | |
|  | |
|  | 
|  | |
| 2 | 
             
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
         | 
| 3 | 
             
            import torch
         | 
| 4 |  | 
|  | |
| 5 | 
             
            model_id = "PowerInfer/SmallThinker-21BA3B-Instruct"
         | 
| 6 |  | 
| 7 | 
            +
            tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
         | 
|  | |
|  | |
|  | |
|  | |
| 8 | 
             
            model = AutoModelForCausalLM.from_pretrained(
         | 
| 9 | 
             
                model_id,
         | 
| 10 | 
            +
                device_map="auto",            # Let HF decide best device
         | 
| 11 | 
            +
                torch_dtype=torch.float16,    # Use float16 for speed if GPU available
         | 
| 12 | 
            +
                trust_remote_code=True
         | 
| 13 | 
             
            )
         | 
| 14 |  | 
|  | |
| 15 | 
             
            generator = pipeline(
         | 
| 16 | 
             
                "text-generation",
         | 
| 17 | 
             
                model=model,
         | 
| 18 | 
             
                tokenizer=tokenizer,
         | 
| 19 | 
            +
                device=0 if torch.cuda.is_available() else -1
         | 
| 20 | 
             
            )
         | 
| 21 |  | 
|  | |
| 22 | 
             
            def chat(prompt, max_new_tokens=256, temperature=0.7):
         | 
| 23 | 
             
                output = generator(
         | 
| 24 | 
             
                    prompt,
         | 
|  | |
| 29 | 
             
                )
         | 
| 30 | 
             
                return output[0]["generated_text"]
         | 
| 31 |  | 
| 32 | 
            +
            # Define the interface but do NOT launch manually
         | 
| 33 | 
            +
            demo = gr.Interface(
         | 
| 34 | 
             
                fn=chat,
         | 
| 35 | 
             
                inputs=[
         | 
| 36 | 
             
                    gr.Textbox(label="Prompt", lines=4, placeholder="Ask anything..."),
         | 
|  | |
| 39 | 
             
                ],
         | 
| 40 | 
             
                outputs=gr.Textbox(label="Response"),
         | 
| 41 | 
             
                title="💬 SmallThinker-21BA3B-Instruct",
         | 
| 42 | 
            +
                description="Run PowerInfer/SmallThinker-21BA3B-Instruct"
         | 
| 43 | 
            +
            )
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            if __name__ == "__main__":
         | 
| 46 | 
            +
                demo.launch()
         |