|
from collections import namedtuple |
|
from typing import Optional |
|
|
|
import openai |
|
import logging |
|
|
|
logger = logging.getLogger("presidio-streamlit") |
|
|
|
OpenAIParams = namedtuple( |
|
"open_ai_params", |
|
["openai_key", "model", "api_base", "deployment_name", "api_version", "api_type"], |
|
) |
|
|
|
|
|
def set_openai_params(openai_params: OpenAIParams): |
|
"""Set the OpenAI API key. |
|
:param openai_params: OpenAIParams object with the following fields: key, model, api version, deployment_name, |
|
The latter only relate to Azure OpenAI deployments. |
|
""" |
|
openai.api_key = openai_params.openai_key |
|
openai.api_version = openai_params.api_version |
|
if openai_params.api_base: |
|
openai.api_base = openai_params.api_base |
|
openai.api_type = openai_params.api_type |
|
|
|
|
|
def call_completion_model( |
|
prompt: str, |
|
model: str = "text-davinci-003", |
|
max_tokens: int = 512, |
|
deployment_id: Optional[str] = None, |
|
) -> str: |
|
"""Creates a request for the OpenAI Completion service and returns the response. |
|
|
|
:param prompt: The prompt for the completion model |
|
:param model: OpenAI model name |
|
:param max_tokens: Model's max_tokens parameter |
|
:param deployment_id: Azure OpenAI deployment ID |
|
""" |
|
if deployment_id: |
|
response = openai.Completion.create( |
|
deployment_id=deployment_id, |
|
model=model, |
|
prompt=prompt, |
|
max_tokens=max_tokens, |
|
) |
|
else: |
|
response = openai.Completion.create( |
|
model=model, prompt=prompt, max_tokens=max_tokens |
|
) |
|
|
|
return response["choices"][0].text |
|
|
|
|
|
def create_prompt(anonymized_text: str) -> str: |
|
""" |
|
Create the prompt with instructions to GPT-3. |
|
|
|
:param anonymized_text: Text with placeholders instead of PII values, e.g. My name is <PERSON>. |
|
""" |
|
|
|
prompt = f""" |
|
Your role is to create synthetic text based on de-identified text with placeholders instead of Personally Identifiable Information (PII). |
|
Replace the placeholders (e.g. ,<PERSON>, {{DATE}}, {{ip_address}}) with fake values. |
|
|
|
Instructions: |
|
|
|
a. Use completely random numbers, so every digit is drawn between 0 and 9. |
|
b. Use realistic names that come from diverse genders, ethnicities and countries. |
|
c. If there are no placeholders, return the text as is. |
|
d. Keep the formatting as close to the original as possible. |
|
e. If PII exists in the input, replace it with fake values in the output. |
|
f. Remove whitespace before and after the generated text |
|
|
|
input: [[TEXT STARTS]] How do I change the limit on my credit card {{credit_card_number}}?[[TEXT ENDS]] |
|
output: How do I change the limit on my credit card 2539 3519 2345 1555? |
|
input: [[TEXT STARTS]]<PERSON> was the chief science officer at <ORGANIZATION>.[[TEXT ENDS]] |
|
output: Katherine Buckjov was the chief science officer at NASA. |
|
input: [[TEXT STARTS]]Cameroon lives in <LOCATION>.[[TEXT ENDS]] |
|
output: Vladimir lives in Moscow. |
|
|
|
input: [[TEXT STARTS]]{anonymized_text}[[TEXT ENDS]] |
|
output:""" |
|
return prompt |
|
|