DeepLearning101 commited on
Commit
5b3bc74
·
verified ·
1 Parent(s): 567e9e2

Update services.py

Browse files
Files changed (1) hide show
  1. services.py +90 -90
services.py CHANGED
@@ -19,125 +19,129 @@ class GeminiService:
19
 
20
  def _check_client(self):
21
  if not self.client:
22
- raise ValueError("API Key 未設定")
23
 
24
- def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  """
26
- Step 1: 領域探索 -> 公司列表
 
 
 
 
 
 
 
 
 
 
27
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  self._check_client()
29
  exclusion_prompt = ""
30
  if exclude_names:
31
  exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
32
 
33
- # Phase 1: Google Search (廣泛探索)
34
- # 這裡的 Prompt 強調:如果使用者輸入的是「領域(如: AI)」,請列出該領域的台灣代表性公司。
35
  search_prompt = f"""
36
- Using Google Search, find 5 to 10 prominent companies in Taiwan related to the query: "{query}".
37
-
38
- **Instructions:**
39
- 1. **Domain Search:** If "{query}" is an industry or technology (e.g., "AI", "Green Energy"), list the top representative Taiwanese companies in this field.
40
- 2. **Company Search:** If "{query}" is a specific name, list that company and its direct competitors.
41
- 3. **Target:** Focus on Taiwanese companies (or global companies with major R&D in Taiwan).
42
  {exclusion_prompt}
43
-
44
  List them (Full Name - Industry/Main Product) in Traditional Chinese.
45
  """
46
-
47
  search_response = self.client.models.generate_content(
48
- model=self.model_id,
49
- contents=search_prompt,
50
- config=types.GenerateContentConfig(
51
- tools=[types.Tool(google_search=types.GoogleSearch())]
52
- )
53
  )
54
- raw_text = search_response.text
55
 
56
- # Phase 2: Extract JSON (結構化)
57
  extract_prompt = f"""
58
- From the text below, extract company names and their industry/main product.
59
- Calculate a Relevance Score (0-100) based on query: "{query}".
60
-
61
- Return ONLY a JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]
62
-
63
- Text:
64
- ---
65
- {raw_text}
66
- ---
67
  """
68
-
69
  extract_response = self.client.models.generate_content(
70
- model=self.model_id,
71
- contents=extract_prompt,
72
- config=types.GenerateContentConfig(
73
- response_mime_type='application/json'
74
- )
75
  )
76
-
77
- try:
78
- return json.loads(extract_response.text)
79
- except Exception as e:
80
- print(f"JSON Parse Error: {e}")
81
- return []
82
 
83
  def get_company_details(self, company: Dict) -> Dict:
84
- """
85
- Step 2: 進行商業徵信調查 (Deep Dive)
86
- """
87
  self._check_client()
88
  name = company.get('name')
89
-
90
  prompt = f"""
91
- Act as a professional "Business Analyst & Investigator".
92
- Conduct a comprehensive investigation on the Taiwanese company: "{name}".
93
-
94
- **Investigation Targets:**
95
-
96
- 1. **Overview (基本盤)**:
97
- - **Tax ID (統編)** & **Capital (資本額)**. (Try to find specific numbers)
98
- - **Representative (代表人)**.
99
- - **Core Business**: What specific problem do they solve? What is their "Ace" product?
100
-
101
- 2. **Workforce & Culture (內部情報)**:
102
- - **Employee Count**.
103
- - **Reviews/Gossip**: Search **PTT (Tech_Job, Soft_Job)**, **Dcard**, **Qollie**.
104
- - Summarize the *REAL* work vibe (e.g., "Good for juniors but low ceiling", "Free snacks but forced overtime").
105
-
106
- 3. **Legal & Risks (排雷專區)**:
107
- - Search: "{name} 勞資糾紛", "{name} 違反勞基法", "{name} 判決", "{name} 罰款".
108
- - List any red flags found in government records or news.
109
-
110
- **Format**:
111
- - Use Markdown.
112
- - Language: Traditional Chinese (繁體中文).
113
- - Be objective but don't sugarcoat potential risks.
114
  """
115
-
116
  response = self.client.models.generate_content(
117
- model=self.model_id,
118
- contents=prompt,
119
- config=types.GenerateContentConfig(
120
- tools=[types.Tool(google_search=types.GoogleSearch())]
121
- )
122
  )
123
-
124
- # Extract Sources
 
 
 
 
125
  sources = []
126
  if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
127
  for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
128
  if chunk.web and chunk.web.uri and chunk.web.title:
129
  sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
130
-
131
  unique_sources = {v['uri']: v for v in sources}.values()
 
132
 
133
- return {
134
- "text": response.text,
135
- "sources": list(unique_sources)
136
- }
137
-
138
- def chat_with_ai(self, history: List[Dict], new_message: str, context: str) -> str:
139
  self._check_client()
140
- system_instruction = f"You are an expert Business Consultant. Answer based on this company report:\n{context}"
141
 
142
  chat_history = []
143
  for h in history:
@@ -145,12 +149,8 @@ class GeminiService:
145
  chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))
146
 
147
  chat = self.client.chats.create(
148
- model=self.model_id,
149
- history=chat_history,
150
- config=types.GenerateContentConfig(
151
- system_instruction=system_instruction
152
- )
153
  )
154
-
155
  response = chat.send_message(new_message)
156
  return response.text
 
19
 
20
  def _check_client(self):
21
  if not self.client:
22
+ raise ValueError("API Key 未設定,請檢查 .env 或 Hugging Face Secrets")
23
 
24
+ # ==========================
25
+ # 🎓 教授搜尋相關功能
26
+ # ==========================
27
+ def search_professors(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
28
+ self._check_client()
29
+ exclusion_prompt = ""
30
+ if exclude_names:
31
+ exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
32
+
33
+ # Phase 1: Search
34
+ search_prompt = f"""
35
+ Using Google Search, find 10 prominent professors in universities across Taiwan who are experts in the field of "{query}".
36
+ CRITICAL: FACT CHECK they are current faculty. RELEVANCE must be high.
37
+ {exclusion_prompt}
38
+ List them (Name - University - Department) in Traditional Chinese.
39
  """
40
+ search_response = self.client.models.generate_content(
41
+ model=self.model_id, contents=search_prompt,
42
+ config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
43
+ )
44
+
45
+ # Phase 2: Extract JSON
46
+ extract_prompt = f"""
47
+ From the text below, extract professor names, universities, and departments.
48
+ Calculate a Relevance Score (0-100) based on query: "{query}".
49
+ Return ONLY a JSON array: [{{"name": "...", "university": "...", "department": "...", "relevanceScore": 85}}]
50
+ Text: --- {search_response.text} ---
51
  """
52
+ extract_response = self.client.models.generate_content(
53
+ model=self.model_id, contents=extract_prompt,
54
+ config=types.GenerateContentConfig(response_mime_type='application/json')
55
+ )
56
+ try: return json.loads(extract_response.text)
57
+ except: return []
58
+
59
+ def get_professor_details(self, professor: Dict) -> Dict:
60
+ self._check_client()
61
+ name, uni, dept = professor.get('name'), professor.get('university'), professor.get('department')
62
+ prompt = f"""
63
+ Act as an academic consultant. Investigate Professor {name} from {dept} at {uni}.
64
+ Find "Combat Experience":
65
+ 1. **Key Publications (Last 5 Years)**: Find 2-3 top papers with Citation Counts.
66
+ 2. **Alumni Directions**: Where do their graduates work?
67
+ 3. **Industry Collaboration**: Any industry projects?
68
+ Format output in Markdown (Traditional Chinese).
69
+ """
70
+ response = self.client.models.generate_content(
71
+ model=self.model_id, contents=prompt,
72
+ config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
73
+ )
74
+ return self._format_response_with_sources(response)
75
+
76
+ # ==========================
77
+ # 🏢 公司搜尋相關功能
78
+ # ==========================
79
+ def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
80
  self._check_client()
81
  exclusion_prompt = ""
82
  if exclude_names:
83
  exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
84
 
85
+ # Phase 1: Search
 
86
  search_prompt = f"""
87
+ Using Google Search, find 5 to 10 prominent companies in Taiwan related to: "{query}".
88
+ Instructions:
89
+ 1. If "{query}" is an industry (e.g. AI), list representative Taiwanese companies.
90
+ 2. If "{query}" is a name, list the company and competitors.
 
 
91
  {exclusion_prompt}
 
92
  List them (Full Name - Industry/Main Product) in Traditional Chinese.
93
  """
 
94
  search_response = self.client.models.generate_content(
95
+ model=self.model_id, contents=search_prompt,
96
+ config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
 
 
 
97
  )
 
98
 
99
+ # Phase 2: Extract JSON
100
  extract_prompt = f"""
101
+ From text, extract company names and industry.
102
+ Calculate Relevance Score (0-100) for query: "{query}".
103
+ Return ONLY JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]
104
+ Text: --- {search_response.text} ---
 
 
 
 
 
105
  """
 
106
  extract_response = self.client.models.generate_content(
107
+ model=self.model_id, contents=extract_prompt,
108
+ config=types.GenerateContentConfig(response_mime_type='application/json')
 
 
 
109
  )
110
+ try: return json.loads(extract_response.text)
111
+ except: return []
 
 
 
 
112
 
113
  def get_company_details(self, company: Dict) -> Dict:
 
 
 
114
  self._check_client()
115
  name = company.get('name')
 
116
  prompt = f"""
117
+ Act as a "Business Analyst". Investigate Taiwanese company: "{name}".
118
+ Targets:
119
+ 1. **Overview**: Tax ID (統編), Capital (資本額), Representative.
120
+ 2. **Workforce & Culture**: Employee count, Reviews from PTT(Tech_Job)/Dcard/Qollie (Pros & Cons).
121
+ 3. **Legal & Risks**: Search for "{name} 勞資糾紛", "{name} 判決", "{name} 違反勞基法".
122
+ Format in Markdown (Traditional Chinese). Be objective.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  """
 
124
  response = self.client.models.generate_content(
125
+ model=self.model_id, contents=prompt,
126
+ config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
 
 
 
127
  )
128
+ return self._format_response_with_sources(response)
129
+
130
+ # ==========================
131
+ # 共用功能
132
+ # ==========================
133
+ def _format_response_with_sources(self, response):
134
  sources = []
135
  if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
136
  for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
137
  if chunk.web and chunk.web.uri and chunk.web.title:
138
  sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
 
139
  unique_sources = {v['uri']: v for v in sources}.values()
140
+ return {"text": response.text, "sources": list(unique_sources)}
141
 
142
+ def chat_with_ai(self, history: List[Dict], new_message: str, context: str, role_instruction: str = "Source of truth") -> str:
 
 
 
 
 
143
  self._check_client()
144
+ system_instruction = f"{role_instruction}:\n{context}"
145
 
146
  chat_history = []
147
  for h in history:
 
149
  chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))
150
 
151
  chat = self.client.chats.create(
152
+ model=self.model_id, history=chat_history,
153
+ config=types.GenerateContentConfig(system_instruction=system_instruction)
 
 
 
154
  )
 
155
  response = chat.send_message(new_message)
156
  return response.text