srijaydeshpande commited on
Commit
0d8ba24
1 Parent(s): 50dfc74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -37
app.py CHANGED
@@ -66,45 +66,20 @@ def txt_to_html(text):
66
 
67
 
68
  def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_probability=0.95):
69
-
70
- def replace_words_with_asterisk(big_string, words_to_replace):
71
- for word in words_to_replace:
72
- big_string = big_string.replace(word, '*')
73
- return big_string
74
-
75
- def get_output(pdfcontent):
76
- output = model.create_chat_completion(
77
- messages=[
78
- {"role": "assistant", "content": prompt},
79
- {
80
- "role": "user",
81
- "content": pdfcontent
82
- }
83
- ],
84
- max_tokens=maxtokens,
85
- temperature=temperature
86
- )
87
- wordstoremove = output['choices'][0]['message']['content']
88
- position = wordstoremove.find("STARTTOKEN,")
89
- if position != -1:
90
- wordstoremove = wordstoremove[position + len("STARTTOKEN,"):].strip()
91
- print('Words to remove----------------------------------------------')
92
- print(wordstoremove.split(','))
93
- output = replace_words_with_asterisk(pdfcontent, wordstoremove.split(','))
94
- return output
95
-
96
- iterations=1
97
- output = pdftext
98
- for iter in range(0,iterations):
99
- output = get_output(output)
100
- print('-------------------------------------------')
101
- print(output)
102
  return output
103
 
104
- def mkdir(dir):
105
- if not os.path.exists(dir):
106
- os.makedirs(dir)
107
-
108
  @spaces.GPU(duration=120)
109
  def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
110
  output_folder = output_folder.replace('\\', '/')
 
66
 
67
 
68
  def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_probability=0.95):
69
+ output = model.create_chat_completion(
70
+ messages=[
71
+ {"role": "assistant", "content": prompt},
72
+ {
73
+ "role": "user",
74
+ "content": pdftext
75
+ }
76
+ ],
77
+ max_tokens=maxtokens,
78
+ temperature=temperature
79
+ )
80
+ output = output['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return output
82
 
 
 
 
 
83
  @spaces.GPU(duration=120)
84
  def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
85
  output_folder = output_folder.replace('\\', '/')