zezeze97 commited on
Commit
3cdc917
1 Parent(s): 286800c
Files changed (2) hide show
  1. README.md +22 -113
  2. sample/DFE-GPS.png +0 -0
README.md CHANGED
@@ -1,113 +1,25 @@
1
  ---
2
  license: apache-2.0
3
  ---
4
- # 基于FormalGeo7K的推理模型
 
5
 
6
- ## 快速开始
7
- 在运行脚本之前,首先安装如下必要的依赖。
8
 
9
- ```shell
10
- pip install torch transformers==4.40.0 accelerate pillow sentencepiece
11
- ```
12
-
13
- ```python
14
- import torch
15
- import transformers
16
- from transformers import AutoModelForCausalLM, AutoTokenizer
17
- from PIL import Image
18
- import warnings
19
- import numpy as np
20
-
21
-
22
- # set device
23
- device = 'cuda' # or cpu
24
- torch.set_default_device(device)
25
-
26
- # create model
27
- model = AutoModelForCausalLM.from_pretrained(
28
- 'NaughtyDog97/FormalEnhencedGPS-34B',
29
- torch_dtype=torch.float16, # float32 for cpu
30
- device_map='auto',
31
- trust_remote_code=True)
32
- tokenizer = AutoTokenizer.from_pretrained(
33
- 'NaughtyDog97/FormalEnhencedGPS-34B',
34
- use_fast=False,
35
- trust_remote_code=True,
36
- )
37
-
38
- # text prompt
39
- img_path = 'sample/4927.png'
40
- qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
41
- prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
42
- text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
43
-
44
- def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=None):
45
- prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
46
-
47
- def insert_separator(X, sep):
48
- return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
49
 
50
- input_ids = []
51
- offset = 0
52
- if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
53
- offset = 1
54
- input_ids.append(prompt_chunks[0][0])
55
 
56
- for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
57
- input_ids.extend(x[offset:])
58
-
59
- if return_tensors is not None:
60
- if return_tensors == 'pt':
61
- return torch.tensor(input_ids, dtype=torch.long)
62
- raise ValueError(f'Unsupported tensor type: {return_tensors}')
63
- return input_ids
64
-
65
- input_ids = tokenizer_image_token(text, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
66
-
67
- # image, sample images can be found in images folder
68
- image = Image.open(img_path).convert('RGB')
69
-
70
- image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
71
-
72
- # generate
73
- with torch.inference_mode():
74
- output_ids = model.generate(
75
- input_ids,
76
- images=image_tensor,
77
- do_sample=False,
78
- temperature=None,
79
- top_p=None,
80
- top_k=None,
81
- num_beams=1,
82
- max_new_tokens=3500,
83
- eos_token_id=tokenizer.eos_token_id,
84
- repetition_penalty=None,
85
- use_cache=True
86
- )[0]
87
-
88
-
89
- respones = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
90
- print(respones)
91
 
 
 
92
  ```
93
 
94
- 我们的模型支持的求解方式有如下三种:
95
- ```python
96
- # Q => Predicted CDL + CoT Answer
97
- prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
98
-
99
-
100
- # Q + Predicted CDL => CoT Answer
101
- prompt = f'Using the provided geometric image, construction_cdl, image_cdl, and question, give a detailed step-by-step solution. Note that there may be minor errors in the construction_cdl and image_cdl.\nThe construction_cdl is:\n{predict_consCDL}\nThe image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
102
-
103
 
104
- # Q + Predicted CDL => Calibrated CDL + CoT Answer
105
- prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
106
-
107
-
108
- ```
109
-
110
- ## 结合Formalization模型的推理
111
  ```python
112
  import torch
113
  import transformers
@@ -139,7 +51,6 @@ def tokenizer_image_token(prompt, tokenizer, image_token_index, return_tensors=N
139
  return input_ids
140
 
141
  def parse_cdl(input_string):
142
- # 使用正则表达式查找各个部分
143
  patterns = {
144
  'construction_cdl': r'(?:The )?(?:calibrate )?construction_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
145
  'image_cdl': r'(?:The )?(?:calibrate )?image_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
@@ -148,8 +59,6 @@ def parse_cdl(input_string):
148
  }
149
 
150
  results = {}
151
-
152
- # 优先匹配包含"calibrate"的版本
153
  for key, pattern in patterns.items():
154
  pattern = pattern.replace("(?:calibrate )?", "(?:calibrate )")
155
  match = re.search(pattern, input_string, re.DOTALL)
@@ -171,26 +80,26 @@ torch.set_default_device(device)
171
 
172
  # create model
173
  formalization_model = AutoModelForCausalLM.from_pretrained(
174
- 'NaughtyDog97/GeoFormalizer',
175
  torch_dtype=torch.float16, # float32 for cpu
176
  device_map='auto',
177
  trust_remote_code=True)
178
 
179
  formalization_tokenizer = AutoTokenizer.from_pretrained(
180
- 'NaughtyDog97/GeoFormalizer',
181
  use_fast=True,
182
  padding_side="right",
183
  trust_remote_code=True)
184
 
185
 
186
  reason_model = AutoModelForCausalLM.from_pretrained(
187
- 'NaughtyDog97/FormalEnhencedGPS-34B',
188
  torch_dtype=torch.float16, # float32 for cpu
189
  device_map='auto',
190
  trust_remote_code=True)
191
  reason_tokenizer = AutoTokenizer.from_pretrained(
192
- 'NaughtyDog97/FormalEnhencedGPS-34B',
193
- use_fast=False,
194
  trust_remote_code=True)
195
 
196
 
@@ -259,13 +168,13 @@ with torch.inference_mode():
259
  respones = reason_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
260
  print(f'Reasoning steps is\n{respones}')
261
 
262
-
263
-
264
  ```
265
 
266
 
267
 
268
- ## Performance
269
- | | Q => Predicted CDL + CoT Answer | Q + Predicted CDL => CoT Answer | Q + Predicted CDL => Calibrated CDL + CoT Answer |
270
- |-----|-------------------------------------|--------------------------------------|------------------------------------------------------|
271
- | siglip-0.4B-yi1.5-34B | 71.84/80.58 | 72.17/81.72 | 72.33/81.72 |
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ # Diagram Formalization Enhanced Multi-Modal Geometry Problem Solver
5
+ ## Model Structure
6
 
7
+ <img src="sample/DFE-GPS.png" alt="Alt text" width="30%" height="auto">
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ - **Diagram Encoder**: [siglip-so400m-patch14-384](https://huggingface.co/google/siglip-so400m-patch14-384)
11
+ - **Lightweight LLM**: [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
12
+ - **LLM**: [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat)
 
 
13
 
14
+ ## Quick Start
15
+ Before running the script, install the following necessary dependencies.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ ```shell
18
+ pip install torch transformers==4.40.0 accelerate pillow sentencepiece
19
  ```
20
 
21
+ You can solve geometric problems using the following script. First, formalize the geometric images with the Diagram Formalizer, and then use the multi-modal reasing model for problem-solving:
 
 
 
 
 
 
 
 
22
 
 
 
 
 
 
 
 
23
  ```python
24
  import torch
25
  import transformers
 
51
  return input_ids
52
 
53
  def parse_cdl(input_string):
 
54
  patterns = {
55
  'construction_cdl': r'(?:The )?(?:calibrate )?construction_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
56
  'image_cdl': r'(?:The )?(?:calibrate )?image_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
 
59
  }
60
 
61
  results = {}
 
 
62
  for key, pattern in patterns.items():
63
  pattern = pattern.replace("(?:calibrate )?", "(?:calibrate )")
64
  match = re.search(pattern, input_string, re.DOTALL)
 
80
 
81
  # create model
82
  formalization_model = AutoModelForCausalLM.from_pretrained(
83
+ 'NaughtyDog97/DiagramFormalizer',
84
  torch_dtype=torch.float16, # float32 for cpu
85
  device_map='auto',
86
  trust_remote_code=True)
87
 
88
  formalization_tokenizer = AutoTokenizer.from_pretrained(
89
+ 'NaughtyDog97/DiagramFormalizer',
90
  use_fast=True,
91
  padding_side="right",
92
  trust_remote_code=True)
93
 
94
 
95
  reason_model = AutoModelForCausalLM.from_pretrained(
96
+ 'NaughtyDog97/DFE-GPS-34B',
97
  torch_dtype=torch.float16, # float32 for cpu
98
  device_map='auto',
99
  trust_remote_code=True)
100
  reason_tokenizer = AutoTokenizer.from_pretrained(
101
+ 'NaughtyDog97/DFE-GPS-34B',
102
+ use_fase=False
103
  trust_remote_code=True)
104
 
105
 
 
168
  respones = reason_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
169
  print(f'Reasoning steps is\n{respones}')
170
 
 
 
171
  ```
172
 
173
 
174
 
175
+ ## Performance of DFE-GPS on formalgeo7k test set
176
+
177
+ | Model | Choice Acc | OpenEnd ACC | Process Evaluation Score |
178
+ |-------|------------|-------------|--------------------------|
179
+ | DFE-GPS-9B | 77.05 | 68.67 | 76.00 |
180
+ | DFE-GPS-34B | **82.38** | **75.33** | **79.07** |
sample/DFE-GPS.png ADDED