Sage commited on
Commit
d870c6e
1 Parent(s): 1f144c7
Files changed (3) hide show
  1. advance-river-381411-c7be39c33cff.json +0 -12
  2. app.py +18 -34
  3. settings.py +0 -1
advance-river-381411-c7be39c33cff.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "type": "service_account",
3
- "project_id": "advance-river-381411",
4
- "private_key_id": "c7be39c33cff4f7d73235768c604b45d34b70828",
5
- "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCwzbKttInEvLFp\nBIsVJC1XV+J5+ZfCRrjdAIeTYdtHLsvTn7Y6zaTB99rXTptw7lHvI/KpP5fIFCV5\n8m6W02bifcMya5ELF4DAQOh5z8rllmEcR9IxX4OtP60/7AvFOOf8AeypbnbbMjjw\nTpBa3H4kkHkp3AGJ8FsfDLU61iQ8O/GLRJj2eJ2XUihqEYQjzLj3rBphjkXqSPbW\nXbHwzNXpeA29QAfWcUnEstM7cWKgE9zdcAI3Nav688BDWP0F5N9r9YEZn5Iv2v3J\nfCzRHXx1TfW/T9+otlBtO5kC6RtDaw5KBEYwF3XfGFazy7wMGufYOrwmj0v59BQn\nP43AeDjBAgMBAAECggEABFSLZmX8NFN3SGZONj64NhIO19QxP3a+Nxp415cfWOFY\naGbaXpsUCJSmmVyhYb0SiGgB3NuT/vhgWRnjE8JafBxerCTWjPYexwan8vjQBDrA\nRuIq757iYZtEReMlACf2RZWx+03bbx+uJZOVBUaud7yKnSf4aS2X+70S0L23Ljry\nsDVD+AUOutKO4amJW2zVGoNy0c7FwnCYf3+glZ+aaFlfZciJ4ryPPS3cux6c/U8p\nRzZgrcLIxjn7UdokqbCYJ1qHpaLgFqt6fG1Fn6Mc1Q6Z10c4JU4sgHEwqSe3mDdc\nlsXsh15preljd3+I1d/tdCnORDPQYR0+jpSXcXgfuQKBgQDcylW6JlLmj+IH1LwV\nzU+3FWWJ3EIecVBFv4NvHQkQmoVOuaC56wflLYcAXZyP8k6O76pqSjofW+aevsiQ\ns2fm43XgFy4XUG+rYELJvPu4mj7swWD2SRukxnMO6qjqg2sAzktgWuKAdq+zoX6z\n/Hab1DIm+nJyfYFuRWf9pxlc+QKBgQDM/56jmNw14cXHwKsDhq8ArCQFmss8gtzp\nInmtWIykcPlx+dgEAyT52q3DCkYfYq/mJDZo1Zp5gDWIu9pdKFJzuhcfoyo2WFI8\nwWueOUZuuKPBTQe+htjsBthP/FHamLukmQGi8LVHpwmKgh9dSFuoNLdQc32/AJ/q\nZAqORnSUCQKBgQCHgyhauGrpWCZC6C8IsprapCdOFgH+7U13gbQJ3qhRqIVpbEVZ\n1wdhgi/56XrOXsoYsMDHvAcweBd0F2TCa1q6O6F9iLyhUp03cj2L1JIrG4DDj20T\nvIta9vJnlV9XkJF8TSG2YhHjBvWQKu65SZsCyZp+kfjsjFuEctUAui63AQKBgBKe\nJIo3F8jM+Glr1hw73yjweVUI+exE9ks2Flbn8937ZKw1RKkYoAMRGTbdeADhtZfx\nMf/TZnQicLo6VVqgjtxzyiXVa8ADxXQ/HMcB7KOhoT2tAUcMeCb3eC3LfKOdu2z3\neG6T7eLUCMnLh42xRKHCJ+PmmUT/iYaAD3VccLoBAoGBALULDC98XHe+J14HOh2u\na3hH4Mw7RPaiznvp/BAYIMdtiGH4eh6b9tXMuBPb3nC8qnHcF5HxqAMHHD+JsZj7\nhIlnvC9xHzAm7vqeqO5N5yT9EefkmvAm7g5AXP5k5/21YbRVbD0eSzB2vfV385us\nqKGloin1O/cM+JHXtcm1QhwS\n-----END PRIVATE KEY-----\n",
6
- "client_email": "test-983@advance-river-381411.iam.gserviceaccount.com",
7
- "client_id": "117808812221688451309",
8
- "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
- "token_uri": "https://oauth2.googleapis.com/token",
10
- "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
- "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-983%40advance-river-381411.iam.gserviceaccount.com"
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -9,14 +9,14 @@ from google.cloud import documentai_v1 as documentai
9
  import json
10
  from google.cloud import vision
11
  import time
12
- from settings import char_remove, gpt_api_key, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
13
  from tqdm import tqdm
14
  import logging
15
 
16
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
 
18
  def chat_gpt_image(content, context):
19
- openai.api_key = gpt_api_key
20
  prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
21
  document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
22
  prefix = "Additionally"
@@ -46,7 +46,7 @@ def remove_na(string):
46
  return string
47
 
48
  def chat_gpt_document(content, document_type, context):
49
- openai.api_key = gpt_api_key
50
  prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
51
  document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text:"
52
  additional_prefix = "Additionally the text"
@@ -100,36 +100,12 @@ def chat_gpt_document(content, document_type, context):
100
  property_info[key] = value.strip()
101
  return json.dumps(property_info, indent=4)
102
 
103
- # def get_openai_api_usage():
104
- # openai.api_key = "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"
105
- # api_key = "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"
106
- # org_id = "org-lqZ72EJMjCjjXdRwPNfys6YO"
107
- # session = requests.Session()
108
- # headers = {
109
- # "Authorization": f"Bearer {api_key}",
110
- # "OpenAI-Organization": org_id
111
- # }
112
-
113
- # # Define the start and end dates for the usage data
114
- # today = datetime.date.today()
115
- # start_date = today - datetime.timedelta(days=30)
116
- # end_date = today
117
-
118
- # # Make the API call to retrieve the usage data
119
- # url = f"https://api.openai.com/v1/usage?date=2023-03-29"
120
- # response = session.get(url, headers=headers)
121
- # response.raise_for_status()
122
- # usage_data = response.json().get("data", [])
123
- # print(usage_data)
124
- # for item in usage_data:
125
- # print(f"Date: {item['aggregation_timestamp']}")
126
- # print(f"Requests: {item['n_requests']}")
127
- # print(f"Tokens: {item['n_context_tokens_total']}")
128
- # print(f"Model ID: {item['n_generated_tokens_total']}")
129
-
130
  def detect_image(content, lang):
131
-
132
- os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'advance-river-381411-c7be39c33cff.json'
 
 
 
133
  client = vision.ImageAnnotatorClient()
134
  buffer = BytesIO()
135
  content.save(buffer, format="PNG")
@@ -147,10 +123,16 @@ def detect_image(content, lang):
147
  '{}\nFor more info on error messages, check: '
148
  'https://cloud.google.com/apis/design/errors'.format(
149
  response.error.message))
 
 
150
  return(response.full_text_annotation.text)
151
 
152
  def detect_document(content):
153
- os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'advance-river-381411-c7be39c33cff.json'
 
 
 
 
154
  PROJECT_ID = project_id
155
  LOCATION = project_location # Format is 'us' or 'eu'
156
  PROCESSOR_ID = processor_id # Create processor in Cloud Console
@@ -188,6 +170,8 @@ def detect_document(content):
188
  name = name.split("/")[-1]
189
  name = name.split('.')[0]
190
 
 
 
191
  return(document_object.text, name)
192
 
193
  def image(content, lang, context):
@@ -230,7 +214,7 @@ def save_json(text, filename):
230
  json.dump(text, outfile)
231
  return filename
232
 
233
- with gr.Blocks(title="Ottico OCR", css=".markdown {text-align: center;}", theme='SebastianBravo/simci_css') as app:
234
  gr.Markdown("""# Ottico OCR
235
  Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
236
  with gr.Tab("Scan Image"):
 
9
  import json
10
  from google.cloud import vision
11
  import time
12
+ from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
13
  from tqdm import tqdm
14
  import logging
15
 
16
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
 
18
  def chat_gpt_image(content, context):
19
+ openai.api_key = os.environ['GPT_API_KEY']
20
  prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
21
  document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
22
  prefix = "Additionally"
 
46
  return string
47
 
48
  def chat_gpt_document(content, document_type, context):
49
+ openai.api_key = os.environ['GPT_API_KEY']
50
  prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
51
  document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text:"
52
  additional_prefix = "Additionally the text"
 
100
  property_info[key] = value.strip()
101
  return json.dumps(property_info, indent=4)
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def detect_image(content, lang):
104
+ credentials = os.environ['CREDENTIALS']
105
+ temp_file_path = 'temp_credentials.json'
106
+ with open(temp_file_path, 'w') as file:
107
+ json.dump(credentials, file)
108
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
109
  client = vision.ImageAnnotatorClient()
110
  buffer = BytesIO()
111
  content.save(buffer, format="PNG")
 
123
  '{}\nFor more info on error messages, check: '
124
  'https://cloud.google.com/apis/design/errors'.format(
125
  response.error.message))
126
+
127
+ os.remove(temp_file_path)
128
  return(response.full_text_annotation.text)
129
 
130
  def detect_document(content):
131
+ credentials = os.environ['CREDENTIALS']
132
+ temp_file_path = 'temp_credentials.json'
133
+ with open(temp_file_path, 'w') as file:
134
+ json.dump(credentials, file)
135
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
136
  PROJECT_ID = project_id
137
  LOCATION = project_location # Format is 'us' or 'eu'
138
  PROCESSOR_ID = processor_id # Create processor in Cloud Console
 
170
  name = name.split("/")[-1]
171
  name = name.split('.')[0]
172
 
173
+ os.remove(temp_file_path)
174
+
175
  return(document_object.text, name)
176
 
177
  def image(content, lang, context):
 
214
  json.dump(text, outfile)
215
  return filename
216
 
217
+ with gr.Blocks(title="Ottico OCR", css=".markdown {text-align: center;}", theme='freddyaboulton/dracula_revamped') as app:
218
  gr.Markdown("""# Ottico OCR
219
  Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
220
  with gr.Tab("Scan Image"):
settings.py CHANGED
@@ -1,5 +1,4 @@
1
  #OpenAI Variables
2
- gpt_api_key = "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"
3
  gpt_model = "gpt-3.5-turbo"
4
 
5
  #Google Variables
 
1
  #OpenAI Variables
 
2
  gpt_model = "gpt-3.5-turbo"
3
 
4
  #Google Variables