sugiv commited on
Commit
99c0ca6
1 Parent(s): 506d5f3

Adding a simple monkey search for Leetcode - Darn LeetMonkey

Browse files
Files changed (2) hide show
  1. app.py +37 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,6 +6,28 @@ from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import os
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Initialize Pinecone
10
  PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
11
  pc = Pinecone(api_key=PINECONE_API_KEY)
@@ -17,10 +39,21 @@ device = 'cpu'
17
  splade = SpladeEncoder(device=device)
18
  dense_model = SentenceTransformer('sentence-transformers/all-Mpnet-base-v2', device=device)
19
 
20
- # Load the quantized Llama 2 model and tokenizer
21
- model_name = "distilgpt2" # Using distilgpt2 for CPU efficiency
22
- tokenizer = AutoTokenizer.from_pretrained(model_name)
23
- model = AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True)
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def search_problems(query, top_k=5):
26
  dense_query = dense_model.encode([query])[0].tolist()
 
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import os
8
 
9
+ import requests
10
+ import os
11
+ from tqdm import tqdm
12
+
13
+
14
+ def download_model(url, model_path):
15
+ response = requests.get(url, stream=True)
16
+ total_size = int(response.headers.get('content-length', 0))
17
+ block_size = 1024 # 1 KB
18
+
19
+ with open(model_path, 'wb') as file, tqdm(
20
+ desc=model_path,
21
+ total=total_size,
22
+ unit='iB',
23
+ unit_scale=True,
24
+ unit_divisor=1024,
25
+ ) as progress_bar:
26
+ for data in response.iter_content(block_size):
27
+ size = file.write(data)
28
+ progress_bar.update(size)
29
+
30
+
31
  # Initialize Pinecone
32
  PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
33
  pc = Pinecone(api_key=PINECONE_API_KEY)
 
39
  splade = SpladeEncoder(device=device)
40
  dense_model = SentenceTransformer('sentence-transformers/all-Mpnet-base-v2', device=device)
41
 
42
+ from llama_cpp import Llama
43
+
44
+ # Define the model URL and path
45
+ model_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
46
+ model_path = "/tmp/llama-2-7b-chat.Q4_K_M.gguf"
47
+
48
+ # Download the model if it doesn't exist
49
+ if not os.path.exists(model_path):
50
+ print(f"Downloading model to {model_path}...")
51
+ download_model(model_url, model_path)
52
+ print("Model downloaded successfully.")
53
+
54
+ # Initialize the Llama model
55
+ llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4)
56
+
57
 
58
  def search_problems(query, top_k=5):
59
  dense_query = dense_model.encode([query])[0].tolist()
requirements.txt CHANGED
@@ -6,4 +6,5 @@ sentence-transformers==2.2.2
6
  pinecone-text
7
  accelerate
8
  optimum
9
- auto-gptq
 
 
6
  pinecone-text
7
  accelerate
8
  optimum
9
+ auto-gptq
10
+ llama-cpp-python