Yijun-Yang commited on
Commit
575d321
1 Parent(s): 02e1dbc

localmodelfixed

Browse files
config-bak.ini CHANGED
@@ -1,7 +1,7 @@
1
  [feature_store]
2
  reject_throttle = 0
3
- embedding_model_path = "/root/models/bce-embedding-base_v1"
4
- reranker_model_path = "/root/models/bce-reranker-base_v1"
5
  repo_dir = "repodir"
6
  work_dir = "workdir"
7
  n_clusters = [20, 50]
@@ -18,7 +18,7 @@ enable_remote = 1
18
  client_url = "http://127.0.0.1:8888/inference"
19
 
20
  [llm.server]
21
- local_llm_path = "/root/models/Qwen1.5-7B-Chat"
22
  local_llm_max_text_length = 32000
23
  local_llm_bind_port = 8888
24
  remote_type = ""
 
1
  [feature_store]
2
  reject_throttle = 0
3
+ embedding_model_path = "bce-embedding-base_v1"
4
+ reranker_model_path = "bce-reranker-base_v1"
5
  repo_dir = "repodir"
6
  work_dir = "workdir"
7
  n_clusters = [20, 50]
 
18
  client_url = "http://127.0.0.1:8888/inference"
19
 
20
  [llm.server]
21
+ local_llm_path = "Qwen1.5-7B-Chat"
22
  local_llm_max_text_length = 32000
23
  local_llm_bind_port = 8888
24
  remote_type = ""
config.ini CHANGED
@@ -1,7 +1,7 @@
1
  [feature_store]
2
  reject_throttle = 0
3
- embedding_model_path = "/root/models/bce-embedding-base_v1"
4
- reranker_model_path = "/root/models/bce-reranker-base_v1"
5
  repo_dir = "repodir"
6
  work_dir = "workdir"
7
  n_clusters = [20, 50]
@@ -18,7 +18,7 @@ enable_remote = 1
18
  client_url = "http://127.0.0.1:8888/inference"
19
 
20
  [llm.server]
21
- local_llm_path = "/root/models/Qwen1.5-7B-Chat"
22
  local_llm_max_text_length = 32000
23
  local_llm_bind_port = 8888
24
  remote_type = ""
 
1
  [feature_store]
2
  reject_throttle = 0
3
+ embedding_model_path = "maidalun1020/bce-embedding-base_v1"
4
+ reranker_model_path = "maidalun1020/bce-reranker-base_v1"
5
  repo_dir = "repodir"
6
  work_dir = "workdir"
7
  n_clusters = [20, 50]
 
18
  client_url = "http://127.0.0.1:8888/inference"
19
 
20
  [llm.server]
21
+ local_llm_path = "Qwen/Qwen1.5-7B-Chat"
22
  local_llm_max_text_length = 32000
23
  local_llm_bind_port = 8888
24
  remote_type = ""
huixiangdou/service/llm_server_hybrid.py CHANGED
@@ -92,11 +92,11 @@ class InferenceWrapper:
92
  )
93
 
94
  self.tokenizer = AutoTokenizer.from_pretrained(model_path,
95
- trust_remote_code=True, local_files_only=True)
96
 
97
  if 'qwen1.5' in model_path.lower():
98
  self.model = AutoModelForCausalLM.from_pretrained(
99
- model_path, device_map='auto', trust_remote_code=True, local_files_only=True).eval()
100
  elif 'qwen' in model_path.lower():
101
  self.model = AutoModelForCausalLM.from_pretrained(
102
  model_path,
@@ -104,13 +104,13 @@ class InferenceWrapper:
104
  trust_remote_code=True,
105
  use_cache_quantization=True,
106
  use_cache_kernel=True,
107
- use_flash_attn=False, local_files_only=True).eval()
108
  else:
109
  self.model = AutoModelForCausalLM.from_pretrained(
110
  model_path,
111
  trust_remote_code=True,
112
  device_map='auto',
113
- torch_dtype='auto', local_files_only=True).eval()
114
 
115
  def chat(self, prompt: str, history=[]):
116
  """Generate a response from local LLM.
 
92
  )
93
 
94
  self.tokenizer = AutoTokenizer.from_pretrained(model_path,
95
+ trust_remote_code=True)
96
 
97
  if 'qwen1.5' in model_path.lower():
98
  self.model = AutoModelForCausalLM.from_pretrained(
99
+ model_path, device_map='auto', trust_remote_code=True).eval()
100
  elif 'qwen' in model_path.lower():
101
  self.model = AutoModelForCausalLM.from_pretrained(
102
  model_path,
 
104
  trust_remote_code=True,
105
  use_cache_quantization=True,
106
  use_cache_kernel=True,
107
+ use_flash_attn=False).eval()
108
  else:
109
  self.model = AutoModelForCausalLM.from_pretrained(
110
  model_path,
111
  trust_remote_code=True,
112
  device_map='auto',
113
+ torch_dtype='auto').eval()
114
 
115
  def chat(self, prompt: str, history=[]):
116
  """Generate a response from local LLM.