Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -10,29 +10,29 @@ from transformers import ( 
     | 
|
| 10 | 
         
             
            )
         
     | 
| 11 | 
         
             
            from threading import Thread
         
     | 
| 12 | 
         | 
| 13 | 
         
            -
             
     | 
| 14 | 
         
            -
             
     | 
| 15 | 
         | 
| 16 | 
         
            -
             
     | 
| 17 | 
         
            -
             
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
             
     | 
| 21 | 
         | 
| 22 | 
         
            -
            model_name_or_path = "TheBloke/phi-2-GPTQ"
         
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 25 | 
         | 
| 26 | 
         
            -
            config = AutoConfig.from_pretrained(model_name_or_path,trust_remote_code=True)
         
     | 
| 27 | 
         
            -
            config.quantization_config["use_exllama"] = False
         
     | 
| 28 | 
         | 
| 29 | 
         
            -
            model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
         
     | 
| 30 | 
         
            -
                                                         device_map="cpu",
         
     | 
| 31 | 
         
            -
                                                         trust_remote_code=True,
         
     | 
| 32 | 
         
            -
                                                         revision="main",
         
     | 
| 33 | 
         
            -
                                                         config=config)
         
     | 
| 34 | 
         | 
| 35 | 
         
            -
            tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
         
     | 
| 36 | 
         | 
| 37 | 
         | 
| 38 | 
         
             
            # Text generation pipeline
         
     | 
| 
         | 
|
| 10 | 
         
             
            )
         
     | 
| 11 | 
         
             
            from threading import Thread
         
     | 
| 12 | 
         | 
| 13 | 
         
            +
            # The huggingface model id for Microsoft's phi-2 model
         
     | 
| 14 | 
         
            +
            checkpoint = "microsoft/phi-2"
         
     | 
| 15 | 
         | 
| 16 | 
         
            +
            # Download and load model and tokenizer
         
     | 
| 17 | 
         
            +
            tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
         
     | 
| 18 | 
         
            +
            model = AutoModelForCausalLM.from_pretrained(
         
     | 
| 19 | 
         
            +
                checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
         
     | 
| 20 | 
         
            +
            )
         
     | 
| 21 | 
         | 
| 22 | 
         
            +
            #model_name_or_path = "TheBloke/phi-2-GPTQ"
         
     | 
| 23 | 
         
            +
            ## To use a different branch, change revision
         
     | 
| 24 | 
         
            +
            ## For example: revision="gptq-4bit-32g-actorder_True"
         
     | 
| 25 | 
         | 
| 26 | 
         
            +
            #config = AutoConfig.from_pretrained(model_name_or_path,trust_remote_code=True)
         
     | 
| 27 | 
         
            +
            #config.quantization_config["use_exllama"] = False
         
     | 
| 28 | 
         | 
| 29 | 
         
            +
            #model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
         
     | 
| 30 | 
         
            +
            #                                             device_map="cpu",
         
     | 
| 31 | 
         
            +
            #                                             trust_remote_code=True,
         
     | 
| 32 | 
         
            +
            #                                             revision="main",
         
     | 
| 33 | 
         
            +
            #                                             config=config)
         
     | 
| 34 | 
         | 
| 35 | 
         
            +
            #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
         
     | 
| 36 | 
         | 
| 37 | 
         | 
| 38 | 
         
             
            # Text generation pipeline
         
     |