HenryStephen commited on
Commit
5e997c4
1 Parent(s): ae1d9e4

Update topic rule

Browse files
app.py CHANGED
@@ -373,7 +373,8 @@ if __name__ == "__main__":
373
 
374
  # 2) Updating topic cluster number
375
  topics_text = ' '.join(
376
- [lemmatizer.lemmatize(topic.lower().replace('-', ' ')) for topic in query_doc.topics])
 
377
  topic_embeddings = generate_scibert_embedding(tokenizer, scibert_model, topics_text)
378
  topic_cluster_number = int(topic_kmeans.predict(topic_embeddings)[0])
379
 
 
373
 
374
  # 2) Updating topic cluster number
375
  topics_text = ' '.join(
376
+ [lemmatizer.lemmatize(topic.lower().replace('-', ' ')) for topic in query_doc.topics if
377
+ topic.lower() not in ["python", "python3"]])
378
  topic_embeddings = generate_scibert_embedding(tokenizer, scibert_model, topics_text)
379
  topic_cluster_number = int(topic_kmeans.predict(topic_embeddings)[0])
380
 
data/index_developing.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff4a1676f2c4b0292acfe69a209d9e429b3a2ca7ae4392f15893886414d5578
3
  size 226391428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0219103fcd17858907afde8f302a69ee470d0d7d9f5e9651b87c6e2d47d1c8
3
  size 226391428
data/kmeans_model_code_unixcoder.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45a9cc8a2e348d81a32ca5fda4e9690b1915d11347f1a343bc4124ae000b38de
3
  size 500271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25d3f1751540c128eb90ae8bac569f835cb9b0c84c9c450a4d487fa90bd3764
3
  size 500271
data/kmeans_model_topic_scibert.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36a1852f52907a12cb18fc90fe8b36d3049e123172d9c061c80b4b679685173e
3
  size 500271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aba8f5771d2c11ad5f9ac00b1ed5e6b6254f8d89905e6e892e52b9834cf12f7
3
  size 500271
data/repo_topic_clusters.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/repo_topic_clusters_developing.json CHANGED
The diff for this file is too large to render. See raw diff