jacobfrye commited on
Commit
8e1cd4f
1 Parent(s): 40ffe6d
Files changed (1) hide show
  1. app.py +122 -58
app.py CHANGED
@@ -1,55 +1,146 @@
 
1
  import gradio as gr
2
- from llm_rs import AutoModel, SessionConfig, GenerationConfig, Precision, KnownModels
 
 
 
 
3
 
4
- # https://huggingface.co/TheBloke/open-llama-13b-open-instruct-GGML/blob/main/open-llama-13b-open-instruct.ggmlv3.q8_0.bin
5
- repo_name = "TheBloke/open-llama-13b-open-instruct-GGML"
6
- file_name = "open-llama-13b-open-instruct.ggmlv3.q8_0.bin"
7
 
8
- examples = [
9
- "Write a travel blog about a 3-day trip to Thailand.",
10
- "Tell me a short story about a robot that has a nice day.",
11
- "Compose a tweet to congratulate rustformers on the launch of their HuggingFace Space.",
12
- "Explain how a candle works to a 6-year-old in a few sentences.",
13
- "What are some of the most common misconceptions about birds?",
14
- "Explain why the Rust programming language is so popular.",
15
- ]
16
 
17
  session_config = SessionConfig(threads=2,batch_size=2)
18
- model = AutoModel.from_pretrained(repo_name, model_file=file_name, model_type=KnownModels.Llama, session_config=session_config,verbose=True)
19
-
20
- def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
23
  ### Instruction:
24
- {instruction}
 
 
 
 
 
 
 
 
25
  ### Response:
26
  Answer:"""
27
- generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
28
  response = ""
29
- streamer = model.stream(prompt=prompt,generation_config=generation_config)
30
- for new_text in streamer:
31
- response += new_text
 
32
  yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
34
 
35
  with gr.Blocks(
36
  theme=gr.themes.Soft(),
37
  css=".disclaimer {font-variant-caps: all-small-caps;}",
38
  ) as demo:
39
  gr.Markdown(
40
- """<h1><center>MPT-7B-Instruct on CPU in Rust 🦀</center></h1>
41
-
42
- This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) to execute [MPT-7B-Instruct](https://huggingface.co/mosaicml/mpt-7b-instruct) on 2 CPU cores.
43
  """
44
  )
 
 
 
 
 
45
  with gr.Row():
46
  with gr.Column():
47
- with gr.Row():
48
- instruction = gr.Textbox(
49
- placeholder="Enter your question or instruction here",
50
- label="Question/Instruction",
51
- elem_id="q-input",
52
- )
53
  with gr.Accordion("Advanced Options:", open=False):
54
  with gr.Row():
55
  with gr.Column():
@@ -113,39 +204,12 @@ with gr.Blocks(
113
  submit = gr.Button("Submit")
114
  with gr.Row():
115
  with gr.Box():
116
- gr.Markdown("**MPT-7B-Instruct**")
117
  output_7b = gr.Markdown()
118
 
119
- with gr.Row():
120
- gr.Examples(
121
- examples=examples,
122
- inputs=[instruction],
123
- cache_examples=False,
124
- fn=process_stream,
125
- outputs=output_7b,
126
- )
127
- with gr.Row():
128
- gr.Markdown(
129
- "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
130
- "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
131
- "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
132
- "biased, or otherwise offensive outputs.",
133
- elem_classes=["disclaimer"],
134
- )
135
- with gr.Row():
136
- gr.Markdown(
137
- "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
138
- elem_classes=["disclaimer"],
139
- )
140
-
141
  submit.click(
142
  process_stream,
143
- inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
144
- outputs=output_7b,
145
- )
146
- instruction.submit(
147
- process_stream,
148
- inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
149
  outputs=output_7b,
150
  )
151
 
 
1
+ import sqlite3
2
  import gradio as gr
3
+ from hashlib import md5 as hash_algo
4
+ from re import match
5
+ from io import BytesIO
6
+ from pypdf import PdfReader
7
+ from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision
8
 
9
+ repo_name = "rustformers/mpt-7b-ggml"
10
+ file_name = "mpt-7b-instruct-q5_1-ggjt.bin"
11
+ script_env = 'prod'
12
 
 
 
 
 
 
 
 
 
13
 
14
  session_config = SessionConfig(threads=2,batch_size=2)
15
+ model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
 
 
16
 
17
+ def process_stream(rules, log, temperature, top_p, top_k, max_new_tokens, seed):
18
+ con = sqlite3.connect("history.db")
19
+ cur = con.cursor()
20
+ instruction = ''
21
+ hashes = []
22
+
23
+ if type(rules) is not list:
24
+ rules = [rules]
25
+
26
+ for rule in rules:
27
+ data, hash = get_file_contents(rule)
28
+ instruction += data + '\n'
29
+ hashes.append(hash)
30
+
31
+ hashes.sort()
32
+ hashes = hash_algo(''.join(hashes).encode()).hexdigest()
33
+
34
+ largest = 0
35
+ lines = instruction.split('\r\n')
36
+
37
+ if len(lines) == 1:
38
+ lines = instruction.split('\n')
39
+
40
+ for line in lines:
41
+ m = match('^(\d+)\.', line)
42
+ if m != None:
43
+ num = int(line[m.start():m.end()-1])
44
+
45
+ if num > largest:
46
+ largest = num
47
+
48
+ instruction += str(largest + 1) + '. '
49
+
50
+ query, hash = get_file_contents(log)
51
+ hashes = hash_algo((hashes + hash).encode()).hexdigest()
52
+
53
+ instruction = instruction.replace('\r\r\n', '\n')
54
+
55
  prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
56
  ### Instruction:
57
+ A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.
58
+
59
+ Q: Read the rules stated below and check the queries for any violation. State the rules which are violated by a query (if any). Also suggest a possible remediation, if possible. Do not make any assumptions outside of the rules stated below.
60
+
61
+ {instruction}The queries are as follows:
62
+ {query}
63
+
64
+ A:
65
+
66
  ### Response:
67
  Answer:"""
68
+
69
  response = ""
70
+ row = cur.execute('SELECT response FROM queries WHERE hexdigest = ?', [hashes]).fetchone()
71
+
72
+ if row != None:
73
+ response += "Cached Result:\n" + row[0]
74
  yield response
75
+ else:
76
+ if script_env != 'test':
77
+ generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
78
+ streamer = model.stream(prompt=prompt,generation_config=generation_config)
79
+ for new_text in streamer:
80
+ response += new_text
81
+ yield response
82
+ else:
83
+ num = 0
84
+ while num < 100:
85
+ response += " " + str(num)
86
+ num += 1
87
+ yield response
88
+
89
+ cur.execute('INSERT INTO queries VALUES(?, ?)', (hashes, response))
90
+ con.commit()
91
+
92
+ cur.close()
93
+ con.close()
94
+
95
+ def get_file_contents(file):
96
+ data = None
97
+ byte_hash = ''
98
+
99
+ with open(file.name, 'rb') as f:
100
+ data = f.read()
101
+ byte_hash = hash_algo(data).hexdigest()
102
+
103
+ if file.name.endswith('.pdf'):
104
+ rdr = PdfReader(BytesIO(data))
105
+ data = ''
106
+
107
+ for page in rdr.pages:
108
+ data += page.extract_text()
109
+ else:
110
+ data = data.decode()
111
+
112
+ if file.name.endswith(".csv"):
113
+ data = data.replace(',', ' ')
114
+
115
+ return (data, byte_hash)
116
 
117
+ def upload_log_file(files):
118
+ file_paths = [file.name for file in files]
119
+ return file_paths
120
+
121
+ def upload_file(files):
122
+ file_paths = [file.name for file in files]
123
+ return file_paths
124
 
125
  with gr.Blocks(
126
  theme=gr.themes.Soft(),
127
  css=".disclaimer {font-variant-caps: all-small-caps;}",
128
  ) as demo:
129
  gr.Markdown(
130
+ """<h1><center>Grid 5.0 Information Security Track</center></h1>
 
 
131
  """
132
  )
133
+
134
+ rules = gr.File(file_count="multiple")
135
+ upload_button = gr.UploadButton("Click to upload a new Compliance Document", file_types=[".txt", ".pdf"], file_count="multiple")
136
+ upload_button.upload(upload_file, upload_button, rules)
137
+
138
  with gr.Row():
139
  with gr.Column():
140
+ log = gr.File()
141
+ upload_log_button = gr.UploadButton("Click to upload a log file", file_types=[".txt", ".csv", ".pdf"], file_count="multiple")
142
+ upload_log_button.upload(upload_log_file, upload_log_button, log)
143
+
 
 
144
  with gr.Accordion("Advanced Options:", open=False):
145
  with gr.Row():
146
  with gr.Column():
 
204
  submit = gr.Button("Submit")
205
  with gr.Row():
206
  with gr.Box():
207
+ gr.Markdown("**Output**")
208
  output_7b = gr.Markdown()
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  submit.click(
211
  process_stream,
212
+ inputs=[rules, log, temperature, top_p, top_k, max_new_tokens,seed],
 
 
 
 
 
213
  outputs=output_7b,
214
  )
215