Spaces:

ming512
/

llm

Runtime error

File size: 4,989 Bytes

16f428a

#@title 2. Launch the web UI

#@markdown If unsure about the branch, write "main" or leave it blank.

import torch
import os
from pathlib import Path

if Path.cwd().name != 'text-generation-webui':
  print("Installing the webui...")

  
  os.system("git clone https://github.com/oobabooga/text-generation-webui")  
  os.system("git clone cd text-generation-webui")  

  torver = torch.__version__
  print(f"TORCH: {torver}")
  is_cuda118 = '+cu118' in torver  # 2.1.0+cu118
  is_cuda117 = '+cu117' in torver  # 2.0.1+cu117

  textgen_requirements = open('requirements.txt').read().splitlines()
  if is_cuda117:
      textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]
  elif is_cuda118:
      textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]
  with open('temp_requirements.txt', 'w') as file:
      file.write('\n'.join(textgen_requirements))

  os.system("pip install -r extensions/openai/requirements.txt --upgrade") 
  os.system("pip install -r temp_requirements.txt --upgrade") 
#   pip install -r extensions/openai/requirements.txt --upgrade
#   pip install -r temp_requirements.txt --upgrade

  print("\033[1;32;1m\n --> If you see a warning about \"previously imported packages\", just ignore it.\033[0;37;0m")
  print("\033[1;32;1m\n --> There is no need to restart the runtime.\n\033[0;37;0m")

  try:
    import flash_attn
  except:
    # !pip uninstall -y flash_attn
    os.system("pip uninstall -y flash_atte") 

# # Deufalt Parameters
# model_url = "https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ" #@param {type:"string"}
# branch = "gptq-4bit-32g-actorder_True" #@param {type:"string"}
# command_line_flags = "--n-gpu-layers 128 --load-in-4bit --use_double_quant" #@param {type:"string"}
# api = True #@param {type:"boolean"}

# #=====================================Ming's customized Parameters =====================================

# Tiny-Vicuna-1B-GGUF Model work as well (around 668M)
model_url = "https://huggingface.co/afrideva/Tiny-Vicuna-1B-GGUF" #@param {type:"string"}
branch = "main" #@param {type:"string"}
command_line_flags = "--n-gpu-layers 128 --load-in-4bit --use_double_quant" #@param {type:"string"}
api = True #@param {type:"boolean"}

# # LinguaMatic-Tiny-GGUF Model around 699MB work as well
# model_url = "https://huggingface.co/erfanzar/LinguaMatic-Tiny-GGUF" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}

# # TinyLlama-1.1B-Chat-v0.3-GGUF Model work as well (around 668M)
# model_url = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}




# # phanerozoic-Tiny-Pirate-1.1b-v0.1-GGUF Not work
# model_url = "https://huggingface.co/s3nh/phanerozoic-Tiny-Pirate-1.1b-v0.1-GGUF" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}

# # tinyllamas (not work)
# model_url = "https://huggingface.co/klosax/tinyllamas-stories-gguf" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}

# # tinyllamas  (not work)
# model_url = "https://huggingface.co/karpathy/tinyllamas" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}

# # llama-tiny-Synthetic-therapist-GGUF  (not work)
# model_url = "https://huggingface.co/wesley7137/llama-tiny-Synthetic-therapist-GGUF" #@param {type:"string"}
# branch = "main" #@param {type:"string"}
# command_line_flags = "" #@param {type:"string"}
# api = True #@param {type:"boolean"}




if api:
  for param in ['--api', '--public-api']:
    if param not in command_line_flags:
      command_line_flags += f" {param}"

model_url = model_url.strip()
if model_url != "":
    if not model_url.startswith('http'):
        model_url = 'https://huggingface.co/' + model_url

    # Download the model
    url_parts = model_url.strip('/').strip().split('/')
    output_folder = f"{url_parts[-2]}_{url_parts[-1]}"
    branch = branch.strip('"\' ')
    if branch.strip() not in ['', 'main']:
        output_folder += f"_{branch}"
        #python download-model.py {model_url} --branch {branch}
        os.system("python download-model.py {model_url} --branch {branch}") 
    else:
        #python download-model.py {model_url}
       os.system("python download-model.py {model_url}") 
else:
    output_folder = ""

# Start the web UI
cmd = f"python server.py --share"
if output_folder != "":
    cmd += f" --model {output_folder}"
cmd += f" {command_line_flags}"
print(cmd)
#!$cmd