File size: 3,155 Bytes
18d0ebf 436b5af c49e5fe 9b74e4d 2c40787 c49e5fe 221985f c49e5fe 9b74e4d f542954 be312ed 9b74e4d be312ed 9b74e4d be312ed 9b74e4d 436b5af 221985f c49e5fe 221985f 6048b63 221985f f872265 221985f 6d29fb4 221985f 2c40787 be312ed 132970b 16cbd47 18d0ebf 68194a3 69fd23e 2c40787 16cbd47 2c40787 18d0ebf 9c6b8fd 8b7a527 221985f 8b7a527 221985f 6f4c5ba 83c9011 221985f 49dcfb0 f872265 8b7a527 c49e5fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#!/usr/bin/env python3
import gradio as gr
import logging
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import sys
import argparse
# Set up logging
logging.basicConfig(level=logging.INFO)
model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
# model_filename="ggml-model-f16.gguf"
# model_filename="ggml-model-Q8_0.gguf"
def download_model():
print("Downloading model...")
sys.stdout.flush()
print(f"Loading model from {args.model_filename}")
file = hf_hub_download(
repo_id=model_repo, filename=args.model_filename
)
print("Downloaded " + file)
return file
def generate_text(prompt, example):
logging.debug(f"Received prompt: {prompt}")
input = f"""
### Instruction:
{prompt}
### Response:
"""
logging.info(f"Input : {input}")
output_stream = llm(input, max_tokens=-1, stop=["</s>", "###"], stream=True, temperature=args.temp)
full_text = ""
for output_chunk in output_stream:
text_chunk = output_chunk.get('choices', [{}])[0].get('text', "")
full_text += text_chunk
yield(full_text) # Send the updated full_text to Gradio interface
logging.info(f"Generated text: {full_text}")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q4_0.gguf")
parser.add_argument("--model-local", help="Path to the model file")
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
parser.add_argument("--temp", help="temperature", default=0.8, type=float)
args = parser.parse_args()
model_path = None
if args.model_local:
model_path = args.model_local
else:
model_path = download_model()
print(f"Loading model from {model_path}")
# TODO n_gpu_layers doesn't seem to be working
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
# Create a Gradio interface
interface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=4, placeholder="Enter text to anonimize...", label="Text with PII",
value="My name is Filipe and my phone number is 555-121-2234. How are you?\nWant to meet up in Los Angeles at 5pm by the Grove?\nI live in downtown LA.")
],
outputs=gr.Textbox(label="PII Sanitized version of the text"),
title="PII Sanitization Model",
description="This application assists you in transforming personally identifiable information (PII) present in the text you provide by replacing it with synthetic PII. Simply input a text containing PII in the textbox below, and the app will return a version of the text with the original PII replaced with synthetic, non-sensitive information. The process ensures the privacy and security of sensitive data while retaining the utility of the text. This is an EXPERIMENTAL application and should not be used in production. DO NOT TYPE real PII in the app as this is a public server and is only to be used for demonstration purposes."
)
interface.queue()
interface.launch()
|