ChatGLM-6B

Runtime error

jodh-intel commited on Jun 4, 2024

Commit

1442fb8

1 Parent(s): cf0a724

ui: Add system features

Add a list of interesting detected system features, plus an expandable
table (hidden by default) that includes more debug information.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>

Files changed (2) hide show

app.py +168 -8
requirements.txt +13 -2

app.py CHANGED Viewed

@@ -1,35 +1,195 @@
 from transformers import AutoModel, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
 import gradio as gr
 import torch
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-tokenizer = LlamaTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True)
-model = LlamaForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True).to(DEVICE)
 model = model.eval()
 def predict(input, history=None):
     if history is None:
         history = []
-    new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt')
     bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
-    history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
     # convert the tokens to text, and then split the responses into the right format
     response = tokenizer.decode(history[0]).split("<|endoftext|>")
-    response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)]  # convert to tuples of list
     return response, history
 with gr.Blocks() as demo:
-    gr.Markdown('''## Confidential HuggingFace Runner
-    ''')
     state = gr.State([])
     chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
     with gr.Row():
         with gr.Column(scale=4):
-            txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
         with gr.Column(scale=1):
             button = gr.Button("Generate")
     txt.submit(predict, [txt, state], [chatbot, state])
     button.click(predict, [txt, state], [chatbot, state])
 demo.queue().launch(share=True, server_name="0.0.0.0")

 from transformers import AutoModel, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
 import gradio as gr
 import torch
+import os
+import io
+import sys
+import platform
+import intel_extension_for_pytorch as ipex
+import intel_extension_for_pytorch._C as ipex_core
+from cpuinfo import get_cpu_info
+from contextlib import redirect_stdout
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+ROOT = '/'
+SELF_ROOT = '/proc/self/root'
+tokenizer = LlamaTokenizer.from_pretrained(
+    "lmsys/vicuna-7b-v1.3", trust_remote_code=True
+)
+model = LlamaForCausalLM.from_pretrained(
+    "lmsys/vicuna-7b-v1.3", trust_remote_code=True
+).to(DEVICE)
 model = model.eval()
+def in_chroot():
+    '''
+    Return true if running in a chroot environment.
+    '''
+    try:
+        root_stat = os.stat(ROOT)
+        self_stat = os.stat(SELF_ROOT)
+    except FileNotFoundError as e:
+        sys.exit(f"ERROR: Failed to stat: {e}")
+    root_inode = root_stat.st_ino
+    self_inode = self_stat.st_ino
+    # Inode 2 is the root inode for most filesystems.
+    # However, XFS uses 128 for root.
+    if root_inode not in [2, 128]:
+        return True
+    return not (root_inode == self_inode)
+def get_features():
+    '''
+    Returns a dictionary of all feature:
+    key: feature name.
+    value: Boolean showing if feature available.
+    '''
+    cpu_info = get_cpu_info()
+    flags = cpu_info["flags"]
+    detect_ipex_amx_enabled = lambda: ipex_core._get_current_isa_level() == 'AMX'
+    detect_ipex_amx_available = (
+        lambda: ipex_core._get_highest_cpu_support_isa_level() == 'AMX'
+    )
+    features = {
+        'VM': 'hypervisor' in flags,
+        'TDX TD': 'tdx_guest' in flags,
+        'AMX available': 'amx_tile' in flags,
+        'AMX-BF16 available': 'amx_bf16' in flags,
+        'AMX-INT8 available': 'amx_int8' in flags,
+        'AVX-VNNI available': 'avx_vnni' in flags,
+        'AVX512-VNNI available': 'avx512_vnni' in flags,
+        'AVX512-FP16 available': 'avx512_fp16' in flags,
+        'AVX512-BF16 available': 'avx512_bf16' in flags,
+        'AMX IPEX available': detect_ipex_amx_available(),
+        'AMX IPEX enabled': detect_ipex_amx_enabled(),
+    }
+    return features
+def get_debug_details():
+    '''
+    Return a block of markdown text that shows useful debug
+    information.
+    '''
+    # ipex.version() prints to stdout, so redirect stdout to
+    # capture the output.
+    buffer = io.StringIO()
+    with redirect_stdout(buffer):
+        ipex.version()
+    ipex_version_details = buffer.getvalue().replace("\n", ", ")
+    ipex_current_isa_level = ipex_core._get_current_isa_level()
+    ipex_max_isa_level = ipex_core._get_highest_cpu_support_isa_level()
+    ipex_env_var = os.getenv('ATEN_CPU_CAPABILITY')
+    onednn_env_var = os.getenv('ONEDNN_MAX_CPU_ISA')
+    in_chroot_result = in_chroot()
+    cpu_info = get_cpu_info()
+    flags = cpu_info["flags"]
+    # Note that rather than using `<details>`, we could use gradio.Accordian(),
+    # but the markdown version is more visually compact.
+    md = f"""
+    <details>
+      <summary>Click to show debug details</summary>
+      | Feature | Value |
+      |-|-|
+      | Arch | `{cpu_info['arch']}` |
+      | CPU | `{cpu_info['brand_raw']}` |
+      | CPU flags | `{flags}` |
+      | Python version | `{sys.version}` (implementation: `{platform.python_implementation()}`) |
+      | Python version details | `{sys.version_info}` |
+      | PyTorch version | `{torch.__version__}` |
+      | IPEX version | `{ipex.ipex_version}` |
+      | IPEX CPU detected | `{ipex_core._has_cpu()}` |
+      | IPEX XPU detected | `{ipex_core._has_xpu()}` |
+      | IPEX version details | `{ipex_version_details}` |
+      | IPEX env var `ATEN_CPU_CAPABILITY` | `{ipex_env_var}` |
+      | IPEX current ISA level | `{ipex_current_isa_level}` |
+      | IPEX max ISA level | `{ipex_max_isa_level}` |
+      | oneDNN env var `ONEDNN_MAX_CPU_ISA` | `{onednn_env_var}` |
+      | in chroot | `{in_chroot_result}` |
+    </details>
+    """
+    return md
 def predict(input, history=None):
     if history is None:
         history = []
+    new_user_input_ids = tokenizer.encode(
+        input + tokenizer.eos_token, return_tensors='pt'
+    )
     bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
+    history = model.generate(
+        bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id
+    ).tolist()
     # convert the tokens to text, and then split the responses into the right format
     response = tokenizer.decode(history[0]).split("<|endoftext|>")
+    response = [
+        (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
+    ]  # convert to tuples of list
     return response, history
 with gr.Blocks() as demo:
+    gr.Markdown(
+        '''## Confidential HuggingFace Runner
+    '''
+    )
     state = gr.State([])
     chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
     with gr.Row():
         with gr.Column(scale=4):
+            txt = gr.Textbox(
+                show_label=False, placeholder="Enter text and press enter"
+            ).style(container=False)
         with gr.Column(scale=1):
             button = gr.Button("Generate")
     txt.submit(predict, [txt, state], [chatbot, state])
     button.click(predict, [txt, state], [chatbot, state])
+    with gr.Row():
+        features_dict = get_features()
+        all_features = features_dict.keys()
+        # Get a list of feature names that are actually set/available
+        set_features = [key for key in features_dict if features_dict[key]]
+        gr.CheckboxGroup(
+            all_features,
+            label="Features",
+            # Make the boxes read-only
+            interactive=False,
+            # Specify which features were detected
+            value=set_features,
+            info="Features detected from environment",
+        )
+    with gr.Row():
+        debug_details = get_debug_details()
+        gr.Markdown(debug_details)
 demo.queue().launch(share=True, server_name="0.0.0.0")

requirements.txt CHANGED Viewed

@@ -1,6 +1,17 @@
-torch
 cpm_kernels
 icetk
 gradio==3.50.2
 accelerate
-git+https://github.com/huggingface/transformers

+# For pytorch
+--find-links https://download.pytorch.org/whl/torch_stable.html
+# For ipex
+--trusted-host pytorch-extension.intel.com
+--extra-index-url http://pytorch-extension.intel.com/release-whl/stable/cpu/us/intel-extension-for-pytorchtorch
 cpm_kernels
 icetk
 gradio==3.50.2
 accelerate
+git+https://github.com/huggingface/transformers
+py-cpuinfo
+# Versions must match
+torch==2.3.0+cpu
+intel-extension-for-pytorch==2.3.0