Carlos Salgado commited on
Commit
bbe64b5
1 Parent(s): b10d0e6

update flake, fix ingest steamlit compatibility bug

Browse files
Files changed (2) hide show
  1. backend/generate_metadata.py +9 -8
  2. flake.nix +11 -3
backend/generate_metadata.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import argparse
3
  import json
4
  import openai
@@ -12,13 +13,13 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
  load_dotenv()
13
 
14
 
15
- def ingest(file_path):
16
- extension = file_path.split('.')[-1]
17
- ext = extension.lower()
18
- if ext == 'pdf':
19
- loader = UnstructuredPDFLoader(file_path)
20
- elif ext == 'txt':
21
- loader = TextLoader(file_path)
22
  else:
23
  raise NotImplementedError('Only .txt or .pdf files are supported')
24
 
@@ -29,7 +30,7 @@ def ingest(file_path):
29
  "\n\n",
30
  "\n",
31
  " ",
32
- ",",
33
  "\uff0c", # Fullwidth comma
34
  "\u3001", # Ideographic comma
35
  "\uff0e", # Fullwidth full stop
 
1
  import os
2
+ import io
3
  import argparse
4
  import json
5
  import openai
 
13
  load_dotenv()
14
 
15
 
16
+ import io
17
+
18
+ def ingest(file_obj, file_ext='pdf'):
19
+ if file_ext == 'pdf':
20
+ loader = UnstructuredPDFLoader(file_obj)
21
+ elif file_ext == 'txt':
22
+ loader = TextLoader(file_obj)
23
  else:
24
  raise NotImplementedError('Only .txt or .pdf files are supported')
25
 
 
30
  "\n\n",
31
  "\n",
32
  " ",
33
+ ",",
34
  "\uff0c", # Fullwidth comma
35
  "\u3001", # Ideographic comma
36
  "\uff0e", # Fullwidth full stop
flake.nix CHANGED
@@ -14,6 +14,9 @@
14
  devShells.${system}.default = pkgs.mkShell {
15
  packages = [
16
  (pkgs.python311.withPackages (python-pkgs: [
 
 
 
17
  python-pkgs.numpy
18
  python-pkgs.pandas
19
  python-pkgs.scipy
@@ -23,15 +26,20 @@
23
  python-pkgs.langchain
24
  python-pkgs.langchain-text-splitters
25
  python-pkgs.unstructured
 
 
 
 
 
 
 
 
26
  python-pkgs.openai
27
  python-pkgs.pydantic
28
  python-pkgs.python-dotenv
29
  python-pkgs.configargparse
30
  python-pkgs.streamlit
31
- python-pkgs.pip
32
  python-pkgs.lark
33
- python-pkgs.jupyter
34
- python-pkgs.notebook
35
  python-pkgs.sentence-transformers
36
  pkgs.unstructured-api
37
  ]))
 
14
  devShells.${system}.default = pkgs.mkShell {
15
  packages = [
16
  (pkgs.python311.withPackages (python-pkgs: [
17
+ python-pkgs.pip # VsCode starts
18
+ python-pkgs.jupyter
19
+ python-pkgs.notebook # VsCode ends
20
  python-pkgs.numpy
21
  python-pkgs.pandas
22
  python-pkgs.scipy
 
26
  python-pkgs.langchain
27
  python-pkgs.langchain-text-splitters
28
  python-pkgs.unstructured
29
+ python-pkgs.wrapt # unstructured[local-inference] starts
30
+ python-pkgs.iso-639
31
+ python-pkgs.emoji
32
+ python-pkgs.pillow-heif
33
+ python-pkgs.magic
34
+ python-pkgs.poppler-qt5
35
+ python-pkgs.pytesseract
36
+ python-pkgs.langdetect # unstructured[local-inference] ends
37
  python-pkgs.openai
38
  python-pkgs.pydantic
39
  python-pkgs.python-dotenv
40
  python-pkgs.configargparse
41
  python-pkgs.streamlit
 
42
  python-pkgs.lark
 
 
43
  python-pkgs.sentence-transformers
44
  pkgs.unstructured-api
45
  ]))