thanhnt-cf commited on
Commit
8c5ab45
·
verified ·
1 Parent(s): 9d8cfe6

update known data

Browse files
Files changed (1) hide show
  1. llm_api/openai_api.py +41 -17
llm_api/openai_api.py CHANGED
@@ -1,33 +1,49 @@
1
  import json
 
2
  from dotenv import load_dotenv
3
- from .utils import get_data_format, get_image_data
4
- from openai import OpenAI, BadRequestError
 
 
 
 
 
 
5
  from .exceptions import RefusalError
6
- from .constants import EXTRACT_INFO_HUMAN_MESSAGE, EXTRACT_INFO_SYSTEM_MESSAGE,FOLLOW_SCHEMA_HUMAN_MESSAGE, FOLLOW_SCHEMA_SYSTEM_MESSAGE
7
 
8
  load_dotenv(override=True)
9
  client = OpenAI()
10
 
11
 
12
- def extract_info(img_paths, schema):
13
- print('Extracting info via OpenAI...')
14
  text_content = [
15
  {
16
  "type": "text",
17
  "text": EXTRACT_INFO_HUMAN_MESSAGE,
18
  },
19
  ]
 
 
 
 
 
 
 
 
 
20
  image_content = [
21
  {
22
  "type": "image_url",
23
  "image_url": {
24
- "url": f"data:image/{get_image_data(img_path)};base64,{get_image_data(img_path)}",
25
  },
26
  }
27
  for img_path in img_paths
28
  ]
29
  response = client.beta.chat.completions.parse(
30
- model="gpt-4o-2024-08-06",
31
  messages=[
32
  {
33
  "role": "system",
@@ -36,7 +52,7 @@ def extract_info(img_paths, schema):
36
  {
37
  "role": "user",
38
  "content": text_content + image_content,
39
- }
40
  ],
41
  max_tokens=1000,
42
  response_format=schema,
@@ -46,25 +62,34 @@ def extract_info(img_paths, schema):
46
  # top_p=.0000000000000000000001
47
  )
48
  if response.choices[0].message.refusal:
49
- raise RefusalError('OpenAI refused to respond to the request')
50
 
51
  content = response.choices[0].message.content
52
- parsed_data = json.loads(content)
53
  model_data = schema.model_validate(parsed_data)
54
 
55
  return 200, model_data
56
 
57
 
58
- def follow_structure(json_info, schema):
59
- print('Following structure via OpenAI...')
60
  text_content = [
61
  {
62
  "type": "text",
63
  "text": FOLLOW_SCHEMA_HUMAN_MESSAGE.format(json_info=json_info),
64
  },
65
  ]
 
 
 
 
 
 
 
 
 
66
  response = client.beta.chat.completions.parse(
67
- model="gpt-4o-2024-08-06",
68
  messages=[
69
  {
70
  "role": "system",
@@ -73,7 +98,7 @@ def follow_structure(json_info, schema):
73
  {
74
  "role": "user",
75
  "content": text_content,
76
- }
77
  ],
78
  max_tokens=1000,
79
  response_format=schema,
@@ -84,11 +109,10 @@ def follow_structure(json_info, schema):
84
  )
85
 
86
  if response.choices[0].message.refusal:
87
- raise RefusalError('OpenAI refused to respond to the request')
88
 
89
  content = response.choices[0].message.content
90
- parsed_data = json.loads(content)
91
  model_data = schema.model_validate(parsed_data)
92
 
93
  return 200, model_data
94
-
 
1
  import json
2
+
3
  from dotenv import load_dotenv
4
+ from openai import BadRequestError, OpenAI
5
+
6
+ from .constants import (
7
+ EXTRACT_INFO_HUMAN_MESSAGE,
8
+ EXTRACT_INFO_SYSTEM_MESSAGE,
9
+ FOLLOW_SCHEMA_HUMAN_MESSAGE,
10
+ FOLLOW_SCHEMA_SYSTEM_MESSAGE,
11
+ )
12
  from .exceptions import RefusalError
13
+ from .utils import get_data_format, get_image_data
14
 
15
  load_dotenv(override=True)
16
  client = OpenAI()
17
 
18
 
19
+ def extract_info(img_paths, schema, known_data=None):
20
+ print("Extracting info via OpenAI...")
21
  text_content = [
22
  {
23
  "type": "text",
24
  "text": EXTRACT_INFO_HUMAN_MESSAGE,
25
  },
26
  ]
27
+
28
+ if known_data is not None:
29
+ text_content.append(
30
+ {
31
+ "type": "text",
32
+ "text": f'\nAlso exploit the known data: \n\n"{known_data}"',
33
+ }
34
+ )
35
+
36
  image_content = [
37
  {
38
  "type": "image_url",
39
  "image_url": {
40
+ "url": f"data:image/{get_image_data(img_path)};base64,{get_image_data(img_path)}",
41
  },
42
  }
43
  for img_path in img_paths
44
  ]
45
  response = client.beta.chat.completions.parse(
46
+ model="gpt-4o-2024-08-06",
47
  messages=[
48
  {
49
  "role": "system",
 
52
  {
53
  "role": "user",
54
  "content": text_content + image_content,
55
+ },
56
  ],
57
  max_tokens=1000,
58
  response_format=schema,
 
62
  # top_p=.0000000000000000000001
63
  )
64
  if response.choices[0].message.refusal:
65
+ raise RefusalError("OpenAI refused to respond to the request")
66
 
67
  content = response.choices[0].message.content
68
+ parsed_data = json.loads(content)
69
  model_data = schema.model_validate(parsed_data)
70
 
71
  return 200, model_data
72
 
73
 
74
+ def follow_structure(json_info, schema, known_data=None):
75
+ print("Following structure via OpenAI...")
76
  text_content = [
77
  {
78
  "type": "text",
79
  "text": FOLLOW_SCHEMA_HUMAN_MESSAGE.format(json_info=json_info),
80
  },
81
  ]
82
+
83
+ if known_data is not None:
84
+ text_content.append(
85
+ {
86
+ "type": "text",
87
+ "text": f'\nAlso Exploit the known data: \n\n"{known_data}"',
88
+ }
89
+ )
90
+
91
  response = client.beta.chat.completions.parse(
92
+ model="gpt-4o-2024-08-06",
93
  messages=[
94
  {
95
  "role": "system",
 
98
  {
99
  "role": "user",
100
  "content": text_content,
101
+ },
102
  ],
103
  max_tokens=1000,
104
  response_format=schema,
 
109
  )
110
 
111
  if response.choices[0].message.refusal:
112
+ raise RefusalError("OpenAI refused to respond to the request")
113
 
114
  content = response.choices[0].message.content
115
+ parsed_data = json.loads(content)
116
  model_data = schema.model_validate(parsed_data)
117
 
118
  return 200, model_data