Mr-Thop commited on
Commit
6596042
·
1 Parent(s): 9f21c0c

Update rm.py

Browse files
Files changed (1) hide show
  1. rm.py +115 -148
rm.py CHANGED
@@ -8,218 +8,185 @@ from google import genai
8
  import json
9
  import logging
10
 
11
- # -------- App & clients --------
 
 
12
  f_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
13
  app = Flask(__name__)
14
  CORS(app)
15
 
16
  client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
17
 
18
- # Safe fallback so we never pass None into send_message
19
- SYSTEM_PROMPT = os.getenv(
20
- "SYSTEM_PROMPT",
21
- "You are a helpful research assistant. Respond using a JSON state machine with states PLAN, CALL, OBSERVATION, OUTPUT."
22
- )
23
-
24
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
25
- log = logging.getLogger("rm.py")
26
-
27
- # -------- Scholar search (location-aware) --------
28
- def get_google_scholar_results(key_params: dict, location: str | None = None):
29
- """
30
- Calls SerpAPI for Google Scholar results.
31
- If `location` is provided, filter author profiles whose text contains that location.
32
- """
33
- key_params["api_key"] = os.getenv("SERPAPI_API_KEY")
34
- key_params["engine"] = "google_scholar"
35
- key_params["hl"] = "en"
36
 
 
 
 
 
 
 
 
37
  search = GoogleSearch(key_params)
38
  results = search.get_dict()
39
-
40
- profiles = results.get("profiles")
41
- organic = results.get("organic_results")
42
-
43
- if profiles and location:
44
- loc = location.strip().lower()
45
- filtered = []
46
- for p in profiles:
47
- # defensively join a few text fields and do a simple substring match
48
- haystack_parts = [
49
- str(p.get("name", "")),
50
- str(p.get("affiliations", "")),
51
- str(p.get("description", "")),
52
- str(p.get("position", "")),
53
- str(p.get("link", "")),
54
- str(p.get("email", "")),
55
- ]
56
- haystack = " | ".join(haystack_parts).lower()
57
- if loc in haystack:
58
- filtered.append(p)
59
- profiles = filtered
60
-
61
- return profiles, organic
62
-
63
- def get_results(query):
64
- """
65
- Location-aware Google Scholar retrieval.
66
-
67
- Accepts:
68
- - string query, OR
69
- - dict with keys: {"query" or "q", "location" (optional)}
70
-
71
- Returns: (profiles, answer, keys)
72
- - profiles: possibly filtered by location
73
- - answer: simplified list of organic results
74
- - keys: keys present in the first organic result (if any)
75
- """
76
- if isinstance(query, dict):
77
- q = query.get("query") or query.get("q") or ""
78
- location = query.get("location")
79
  else:
80
- q = str(query)
81
- location = None
82
 
83
- q_for_scholar = f"{q} {location}".strip() if location else q
84
- params = {"q": q_for_scholar}
 
 
 
 
 
 
 
 
 
 
85
 
86
  answer = []
87
- profiles, organic = get_google_scholar_results(params, location=location)
88
- keys = organic[0].keys() if organic and len(organic) > 0 else []
89
 
90
- if organic:
91
- for item in organic:
 
 
92
  output = {}
93
- if "title" in item:
94
- output["title"] = item["title"]
95
- if "result_id" in item:
96
- output["result_id"] = item["result_id"]
97
- if "link" in item:
98
- output["link"] = item["link"]
99
- log.info("Result link: %s", output["link"])
100
- if "https://www.annualreviews" in item["link"]:
101
- output["abstract"] = get_abstract(item["link"])
102
- if "snippet" in item:
103
- output["snippet"] = item["snippet"]
104
- if "publication_info" in item:
105
- output["publication_info"] = item["publication_info"]
106
- if "resources" in item:
107
- output["resources"] = item["resources"]
108
  answer.append(output)
 
109
 
110
- return profiles, answer, keys
111
 
112
- # -------- Scraping / LLM helpers --------
113
  def get_abstract(url: str):
114
- scrape_result = f_app.scrape_url(url, formats=["markdown", "html"])
115
  if "Abstract" in scrape_result.html:
116
  offset = scrape_result.html.find("Abstract")
117
  start = scrape_result.html[offset:].find("<p>")
118
- end = scrape_result.html[offset + start:].find("</p>")
119
- return scrape_result.html[offset + start : offset + start + end]
120
- return "Abstract not found"
121
-
122
- def scrape_web(url: str):
123
- scrape_result = f_app.scrape_url(url, formats=["markdown", "html"])
 
 
 
 
 
124
  return scrape_result.html
125
 
126
- def get_response(chat_client, user):
127
- # never pass None to the SDK
128
- if user is None:
129
- user = ""
130
  response = chat_client.send_message(user)
131
  return response.candidates[0].content.parts[0].text
132
 
133
  def convert_to_json(text):
134
  start = text.find("{")
135
  end = text[::-1].find("}")
136
- json_text = text[start : -end] if end != -1 else text[start:]
137
  try:
138
  return json.loads(json_text)
139
  except Exception as e:
140
  return "Json Parse Error due to " + str(e)
141
 
142
- def get_observation(function, inp):
143
- functions = ["get_results", "scrape_web"]
144
  if function == functions[0]:
145
- if isinstance(inp, dict):
146
- q = inp.get("query") or inp.get("q") or ""
147
- location = inp.get("location")
148
- profiles, answer, keys = get_results({"query": q, "location": location})
149
- else:
150
- profiles, answer, keys = get_results(inp)
151
  out_dict = {
152
- "state": "OBSERVATION",
153
- "observation": {
154
- "profiles": profiles,
155
- "answer": answer,
156
- "keys": list(keys) if keys else []
157
  }
158
  }
159
  elif function == functions[1]:
160
  html_text = scrape_web(inp)
161
  out_dict = {
162
- "state": "OBSERVATION",
163
- "observation": {"html_text": html_text}
 
 
164
  }
165
  else:
166
  out_dict = {
167
- "state": "OBSERVATION",
168
- "observation": {"message": "Function Not found, Please Retry"}
 
 
169
  }
170
  return out_dict
171
 
172
- def get_output(chat_client, inp):
173
- response = get_response(chat_client, str(inp))
174
  output = convert_to_json(response)
175
- while isinstance(output, dict) and output.get("state") != "OUTPUT":
176
- if output.get("state") == "PLAN":
177
- response = get_response(chat_client, str(output))
178
  output = convert_to_json(response)
179
- elif output.get("state") == "CALL":
180
- function = output.get("function_name")
181
- params_obj = output.get("params", {})
182
- inp_to_fn = params_obj if isinstance(params_obj, dict) and params_obj else None
183
- if not inp_to_fn:
184
- for k in params_obj.keys():
185
- inp_to_fn = params_obj[k]
186
- obs = get_observation(function, inp_to_fn)
187
- response = get_response(chat_client, str(obs))
188
  output = convert_to_json(response)
189
- elif output.get("state") == "OBSERVATION":
190
- response = get_response(chat_client, str(output))
191
  output = convert_to_json(response)
192
  else:
193
- response = get_response(chat_client, str(output))
194
  output = convert_to_json(response)
195
  return output
 
 
 
196
 
197
  def chat(query: str):
198
- chat_client = client.chats.create(model="gemini-2.5-flash")
199
- _ = get_response(chat_client, SYSTEM_PROMPT)
200
- inp = {"state": "START", "user": query}
201
- output = get_output(chat_client, inp)
 
 
 
 
 
 
202
  return output["output"]
203
 
204
- # -------- Routes --------
205
- @app.route("/", methods=["GET"])
206
  def default():
207
  return jsonify({"message": "Backend Working Successfully"})
208
 
209
- @app.route("/chat", methods=["POST", "GET"])
210
  def get_chat_results():
211
- if request.method == "POST":
212
- data = request.get_json(silent=True) or {}
213
- query = data.get("query")
214
- else: # GET
215
- query = request.args.get("query")
216
-
217
- if not query:
218
- return jsonify({"error": "No query provided"}), 400
219
-
220
  output = chat(query)
221
- return jsonify({"output": output})
222
-
223
 
224
 
225
 
 
8
  import json
9
  import logging
10
 
11
+
12
+
13
+
14
  f_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
15
  app = Flask(__name__)
16
  CORS(app)
17
 
18
  client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")
22
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
23
+
24
+ def get_google_scholar_results(key_params: dict):
25
+ key_params['api_key'] = os.getenv("SERPAPI_API_KEY")
26
+ key_params['engine'] = "google_scholar"
27
+ key_params['hl'] = "en"
28
  search = GoogleSearch(key_params)
29
  results = search.get_dict()
30
+ if "profiles" in results and "organic_results" in results:
31
+ return results["profiles"],results["organic_results"]
32
+ elif "profiles" in results:
33
+ return results["profiles"],None
34
+ elif "organic_results" in results:
35
+ return None,results["organic_results"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  else:
37
+ return None,None
 
38
 
39
+ def get_results(query: str):
40
+ '''
41
+ This function is used to get the results from the Google Scholar API.
42
+ It takes a query as input and returns a list of dictionaries, each containing the information about a paper/author.
43
+ The keys of the dictionaries are the fields of the paper.
44
+
45
+ Keys of the dictionary are:
46
+ dict_keys(['position', 'title', 'result_id', 'link', 'snippet', 'publication_info', 'resources', 'inline_links'])
47
+ '''
48
+ params = {
49
+ "q": query,
50
+ }
51
 
52
  answer = []
53
+ keys = []
 
54
 
55
+ profiles,result = get_google_scholar_results(params)
56
+ if result:
57
+ keys = result[0].keys()
58
+ for i in range(len(result)):
59
  output = {}
60
+ if "title" in result[i]:
61
+ output["title"] = result[i]["title"]
62
+ if "result_id" in result[i]:
63
+ output["result_id"] = result[i]["result_id"]
64
+ if "link" in result[i]:
65
+ output["link"] = result[i]["link"]
66
+ if "https://www.annualreviews" in result[i]["link"]:
67
+ output["abstract"] = get_abstract(result[i]["link"])
68
+ if "snippet" in result[i]:
69
+ output["snippet"] = result[i]["snippet"]
70
+ if "publication_info" in result[i]:
71
+ output["publication_info"] = result[i]["publication_info"]
72
+ if "resources" in result[i]:
73
+ output["resources"] = result[i]["resources"]
74
+
75
  answer.append(output)
76
+
77
 
78
+ return profiles,answer,keys
79
 
 
80
  def get_abstract(url: str):
81
+ scrape_result = f_app.scrape(url, formats=['markdown', 'html'])
82
  if "Abstract" in scrape_result.html:
83
  offset = scrape_result.html.find("Abstract")
84
  start = scrape_result.html[offset:].find("<p>")
85
+ end = scrape_result.html[offset+start:].find("</p>")
86
+ return scrape_result.html[offset+start:offset+start+end]
87
+ else:
88
+ return "Abstract not found"
89
+
90
+ def scrape_web(url:str):
91
+ '''
92
+ This function is used inorder to scrape any websitye based on its url
93
+ Returns the html code of the webpage
94
+ '''
95
+ scrape_result = f_app.scrape(url, formats=['markdown', 'html'])
96
  return scrape_result.html
97
 
98
+ def get_response(chat_client,user):
 
 
 
99
  response = chat_client.send_message(user)
100
  return response.candidates[0].content.parts[0].text
101
 
102
  def convert_to_json(text):
103
  start = text.find("{")
104
  end = text[::-1].find("}")
105
+ json_text = text[start : -end]
106
  try:
107
  return json.loads(json_text)
108
  except Exception as e:
109
  return "Json Parse Error due to " + str(e)
110
 
111
+ def get_observation(function,inp):
112
+ functions = ["get_results","scrape_web"]
113
  if function == functions[0]:
114
+ profiles,answer,keys = get_results(inp)
 
 
 
 
 
115
  out_dict = {
116
+ "state" : "OBSERVATION",
117
+ "observation" : {
118
+ "profiles" : profiles,
119
+ "answer" : answer,
120
+ "keys" : keys
121
  }
122
  }
123
  elif function == functions[1]:
124
  html_text = scrape_web(inp)
125
  out_dict = {
126
+ "state" : "OBSERVATION",
127
+ "observation" : {
128
+ "html_text" : html_text
129
+ }
130
  }
131
  else:
132
  out_dict = {
133
+ "state" : "OBSERVATION",
134
+ "observation" : {
135
+ "message":"Function Not found, Please Retry"
136
+ }
137
  }
138
  return out_dict
139
 
140
+ def get_output(chat_client,inp):
141
+ response = get_response(chat_client,str(inp))
142
  output = convert_to_json(response)
143
+ while output["state"] != "OUTPUT":
144
+ if output["state"] == "PLAN":
145
+ response = get_response(chat_client,str(output))
146
  output = convert_to_json(response)
147
+ elif output["state"] == "CALL":
148
+ function = output["function_name"]
149
+ for i in output["params"].keys():
150
+ inp = output["params"][i]
151
+ obs = get_observation(function,inp)
152
+ response = get_response(chat_client,str(obs))
 
 
 
153
  output = convert_to_json(response)
154
+ elif output["state"] == "OBSERVATION":
155
+ response = get_response(chat_client,str(output))
156
  output = convert_to_json(response)
157
  else:
158
+ response = get_response(chat_client,str(output))
159
  output = convert_to_json(response)
160
  return output
161
+
162
+
163
+
164
 
165
  def chat(query: str):
166
+ chat_client = client.chats.create(
167
+ model="gemini-2.5-flash"
168
+ )
169
+ response = get_response(chat_client,SYSTEM_PROMPT)
170
+ inp = {
171
+ "state" : "START",
172
+ "user" : query
173
+ }
174
+
175
+ output = get_output(chat_client,inp)
176
  return output["output"]
177
 
178
+
179
+ @app.route("/",methods=["GET"])
180
  def default():
181
  return jsonify({"message": "Backend Working Successfully"})
182
 
183
+ @app.route("/chat",methods=["POST","GET"])
184
  def get_chat_results():
185
+ query = request.json.get("query")
186
+ app.logger.info(f"Chat Initiated : {query}")
 
 
 
 
 
 
 
187
  output = chat(query)
188
+ app.logger.info("Output Parsed")
189
+ return jsonify({"output":output})
190
 
191
 
192