gclone-4125-x2

Sleeping

App Files Files Community

jacobfrye commited on Aug 20, 2023

Commit

8e1cd4f

•

1 Parent(s): 40ffe6d

Update UI

Browse files

Files changed (1) hide show

app.py +122 -58

app.py CHANGED Viewed

@@ -1,55 +1,146 @@
 import gradio as gr
-from llm_rs import AutoModel, SessionConfig, GenerationConfig, Precision, KnownModels
-# https://huggingface.co/TheBloke/open-llama-13b-open-instruct-GGML/blob/main/open-llama-13b-open-instruct.ggmlv3.q8_0.bin
-repo_name = "TheBloke/open-llama-13b-open-instruct-GGML"
-file_name = "open-llama-13b-open-instruct.ggmlv3.q8_0.bin"
-examples = [
-    "Write a travel blog about a 3-day trip to Thailand.",
-    "Tell me a short story about a robot that has a nice day.",
-    "Compose a tweet to congratulate rustformers on the launch of their HuggingFace Space.",
-    "Explain how a candle works to a 6-year-old in a few sentences.",
-    "What are some of the most common misconceptions about birds?",
-    "Explain why the Rust programming language is so popular.",
-]
 session_config = SessionConfig(threads=2,batch_size=2)
-model = AutoModel.from_pretrained(repo_name, model_file=file_name, model_type=KnownModels.Llama, session_config=session_config,verbose=True)
-def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
     prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
-{instruction}
 ### Response:
 Answer:"""
-    generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
     response = ""
-    streamer = model.stream(prompt=prompt,generation_config=generation_config)
-    for new_text in streamer:
-        response += new_text
         yield response
 with gr.Blocks(
     theme=gr.themes.Soft(),
     css=".disclaimer {font-variant-caps: all-small-caps;}",
 ) as demo:
     gr.Markdown(
-        """<h1><center>MPT-7B-Instruct on CPU in Rust 🦀</center></h1>
-        This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) to execute [MPT-7B-Instruct](https://huggingface.co/mosaicml/mpt-7b-instruct) on 2 CPU cores.
         """
     )
     with gr.Row():
         with gr.Column():
-            with gr.Row():
-                instruction = gr.Textbox(
-                    placeholder="Enter your question or instruction here",
-                    label="Question/Instruction",
-                    elem_id="q-input",
-                )
             with gr.Accordion("Advanced Options:", open=False):
                 with gr.Row():
                     with gr.Column():
@@ -113,39 +204,12 @@ with gr.Blocks(
         submit = gr.Button("Submit")
     with gr.Row():
         with gr.Box():
-            gr.Markdown("**MPT-7B-Instruct**")
             output_7b = gr.Markdown()
-    with gr.Row():
-        gr.Examples(
-            examples=examples,
-            inputs=[instruction],
-            cache_examples=False,
-            fn=process_stream,
-            outputs=output_7b,
-        )
-    with gr.Row():
-        gr.Markdown(
-            "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
-            "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
-            "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
-            "biased, or otherwise offensive outputs.",
-            elem_classes=["disclaimer"],
-        )
-    with gr.Row():
-        gr.Markdown(
-            "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
-            elem_classes=["disclaimer"],
-        )
     submit.click(
         process_stream,
-        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
-        outputs=output_7b,
-    )
-    instruction.submit(
-        process_stream,
-        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
         outputs=output_7b,
     )

+import sqlite3
 import gradio as gr
+from hashlib import md5 as hash_algo
+from re import match
+from io import BytesIO
+from pypdf import PdfReader
+from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision
+repo_name = "rustformers/mpt-7b-ggml"
+file_name = "mpt-7b-instruct-q5_1-ggjt.bin"
+script_env = 'prod'
 session_config = SessionConfig(threads=2,batch_size=2)
+model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
+def process_stream(rules, log, temperature, top_p, top_k, max_new_tokens, seed):
+    con = sqlite3.connect("history.db")
+    cur = con.cursor()
+    instruction = ''
+    hashes = []
+    if type(rules) is not list:
+        rules = [rules]
+    for rule in rules:
+        data, hash = get_file_contents(rule)
+        instruction += data + '\n'
+        hashes.append(hash)
+    hashes.sort()
+    hashes = hash_algo(''.join(hashes).encode()).hexdigest()
+    largest = 0
+    lines = instruction.split('\r\n')
+    if len(lines) == 1:
+        lines = instruction.split('\n')
+    for line in lines:
+        m = match('^(\d+)\.', line)
+        if m != None:
+            num = int(line[m.start():m.end()-1])
+            if num > largest:
+                largest = num
+    instruction += str(largest + 1) + '. '
+    query, hash = get_file_contents(log)
+    hashes = hash_algo((hashes + hash).encode()).hexdigest()
+    instruction = instruction.replace('\r\r\n', '\n')
     prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
+A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.
+Q: Read the rules stated below and check the queries for any violation. State the rules which are violated by a query (if any). Also suggest a possible remediation, if possible. Do not make any assumptions outside of the rules stated below.
+{instruction}The queries are as follows:
+{query}
+A:
 ### Response:
 Answer:"""
     response = ""
+    row = cur.execute('SELECT response FROM queries WHERE hexdigest = ?', [hashes]).fetchone()
+    if row != None:
+        response += "Cached Result:\n" + row[0]
         yield response
+    else:
+        if script_env != 'test':
+            generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
+            streamer = model.stream(prompt=prompt,generation_config=generation_config)
+            for new_text in streamer:
+                response += new_text
+                yield response
+        else:
+            num = 0
+            while num < 100:
+                response += " " + str(num)
+                num += 1
+                yield response
+        cur.execute('INSERT INTO queries VALUES(?, ?)', (hashes, response))
+        con.commit()
+    cur.close()
+    con.close()
+def get_file_contents(file):
+    data = None
+    byte_hash = ''
+    with open(file.name, 'rb') as f:
+        data = f.read()
+        byte_hash = hash_algo(data).hexdigest()
+    if file.name.endswith('.pdf'):
+        rdr = PdfReader(BytesIO(data))
+        data = ''
+        for page in rdr.pages:
+            data += page.extract_text()
+    else:
+        data = data.decode()
+        if file.name.endswith(".csv"):
+            data = data.replace(',', ' ')
+    return (data, byte_hash)
+def upload_log_file(files):
+    file_paths = [file.name for file in files]
+    return file_paths
+def upload_file(files):
+    file_paths = [file.name for file in files]
+    return file_paths
 with gr.Blocks(
     theme=gr.themes.Soft(),
     css=".disclaimer {font-variant-caps: all-small-caps;}",
 ) as demo:
     gr.Markdown(
+        """<h1><center>Grid 5.0 Information Security Track</center></h1>
         """
     )
+    rules = gr.File(file_count="multiple")
+    upload_button = gr.UploadButton("Click to upload a new Compliance Document", file_types=[".txt", ".pdf"], file_count="multiple")
+    upload_button.upload(upload_file, upload_button, rules)
     with gr.Row():
         with gr.Column():
+            log = gr.File()
+            upload_log_button = gr.UploadButton("Click to upload a log file", file_types=[".txt", ".csv", ".pdf"], file_count="multiple")
+            upload_log_button.upload(upload_log_file, upload_log_button, log)
             with gr.Accordion("Advanced Options:", open=False):
                 with gr.Row():
                     with gr.Column():
         submit = gr.Button("Submit")
     with gr.Row():
         with gr.Box():
+            gr.Markdown("**Output**")
             output_7b = gr.Markdown()
     submit.click(
         process_stream,
+        inputs=[rules, log, temperature, top_p, top_k, max_new_tokens,seed],
         outputs=output_7b,
     )