isayahc commited on
Commit
093c770
1 Parent(s): 0c1638d

setting up application

Browse files
Files changed (3) hide show
  1. dl_bin.py +32 -0
  2. ingest.py +12 -1
  3. utils.py +1 -0
dl_bin.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from typing import Optional
3
+
4
+ def download_binary_file(url: str, file_path: Optional[str] = None) -> None:
5
+ """
6
+ Download a binary file from a given URL and save it to the specified path.
7
+
8
+ :param url: URL of the binary file to be downloaded.
9
+ :param file_path: Local path to save the file. If None, the file will be saved with its original name.
10
+ """
11
+ try:
12
+ response = requests.get(url, stream=True)
13
+ response.raise_for_status()
14
+
15
+ # If no specific file path is provided, extract the file name from the URL
16
+ if file_path is None:
17
+ file_path = url.split('/')[-1]
18
+
19
+ with open(file_path, 'wb') as file:
20
+ for chunk in response.iter_content(chunk_size=8192):
21
+ file.write(chunk)
22
+
23
+ print(f"File downloaded successfully: {file_path}")
24
+ except requests.exceptions.RequestException as e:
25
+ print(f"Error downloading file: {e}")
26
+
27
+ # Example usage
28
+ url = "https://llamahack.slack.com/files/U069A8NRB9T/F068ZTLK9KR/anthem_hsa_medical_insurance_benefit_booklet.pdf"
29
+ # download_binary_file(url)
30
+
31
+ import urllib.request
32
+ urllib.request.urlretrieve(url, "filename.pdf")
ingest.py CHANGED
@@ -1,5 +1,11 @@
1
  from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext
2
  from llama_index.text_splitter import SentenceSplitter
 
 
 
 
 
 
3
 
4
  documents = SimpleDirectoryReader("./data").load_data()
5
 
@@ -10,11 +16,16 @@ index = VectorStoreIndex.from_documents(
10
  documents, service_context=service_context
11
  )
12
 
13
- from llama_index.query import QueryBuilder
 
 
 
14
 
15
  # Define the query text
16
  query_text = "How does the weather affect crop growth?"
17
 
 
 
18
  # Preprocess the query text
19
  query_builder = QueryBuilder(service_context)
20
  query = query_builder.build_query(query_text)
 
1
  from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext
2
  from llama_index.text_splitter import SentenceSplitter
3
+ import dotenv
4
+ import os
5
+
6
+ dotenv.load_dotenv()
7
+
8
+
9
 
10
  documents = SimpleDirectoryReader("./data").load_data()
11
 
 
16
  documents, service_context=service_context
17
  )
18
 
19
+ query_engine = index.as_query_engine()
20
+
21
+
22
+ # from llama_index.query import QueryBuilder
23
 
24
  # Define the query text
25
  query_text = "How does the weather affect crop growth?"
26
 
27
+ data = query_engine.query(query)
28
+
29
  # Preprocess the query text
30
  query_builder = QueryBuilder(service_context)
31
  query = query_builder.build_query(query_text)
utils.py CHANGED
@@ -2,6 +2,7 @@ import requests
2
  import os
3
  from typing import Optional
4
  from urllib.parse import urlparse, unquote
 
5
 
6
  def get_filename_from_url(url: str, cd: Optional[str]) -> str:
7
  """
 
2
  import os
3
  from typing import Optional
4
  from urllib.parse import urlparse, unquote
5
+ import gdrive
6
 
7
  def get_filename_from_url(url: str, cd: Optional[str]) -> str:
8
  """