Spaces:
Sleeping
Sleeping
Ilyas KHIAT
commited on
Commit
·
eeaf024
1
Parent(s):
fe370a3
api first commit by me :)
Browse files
main.py
CHANGED
@@ -62,7 +62,8 @@ async def upload_file(file: UploadFile, enterprise_data: Json[EnterpriseData]):
|
|
62 |
|
63 |
# Assign a new UUID if id is not provided
|
64 |
if enterprise_data.id is None:
|
65 |
-
|
|
|
66 |
|
67 |
# Open the file with PyMuPDF
|
68 |
pdf_document = pymupdf.open(stream=contents, filetype="pdf")
|
|
|
62 |
|
63 |
# Assign a new UUID if id is not provided
|
64 |
if enterprise_data.id is None:
|
65 |
+
clean_name = remove_non_standard_ascii(enterprise_name)
|
66 |
+
enterprise_data.id = f"{clean_name}_{uuid4()}"
|
67 |
|
68 |
# Open the file with PyMuPDF
|
69 |
pdf_document = pymupdf.open(stream=contents, filetype="pdf")
|
rag.py
CHANGED
@@ -8,6 +8,13 @@ from langchain_openai import ChatOpenAI
|
|
8 |
from langchain_core.output_parsers import StrOutputParser
|
9 |
from langchain_core.prompts import PromptTemplate
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
def get_text_from_content_for_doc(content):
|
@@ -44,7 +51,8 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
|
44 |
page_content=chunk,
|
45 |
metadata={"filename":filename,"file_type":file_type},
|
46 |
)
|
47 |
-
|
|
|
48 |
uuids.append(uuid)
|
49 |
documents.append(document)
|
50 |
|
@@ -53,6 +61,7 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
|
|
53 |
return True
|
54 |
|
55 |
except Exception as e:
|
|
|
56 |
return False
|
57 |
|
58 |
def get_retreive_answer(enterprise_id,prompt,index):
|
@@ -70,6 +79,7 @@ def get_retreive_answer(enterprise_id,prompt,index):
|
|
70 |
return response
|
71 |
|
72 |
except Exception as e:
|
|
|
73 |
return False
|
74 |
|
75 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
|
|
|
8 |
from langchain_core.output_parsers import StrOutputParser
|
9 |
from langchain_core.prompts import PromptTemplate
|
10 |
|
11 |
+
import unicodedata
|
12 |
+
|
13 |
+
def remove_non_standard_ascii(input_string: str) -> str:
|
14 |
+
normalized_string = unicodedata.normalize('NFKD', input_string)
|
15 |
+
return ''.join(char for char in normalized_string if 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char.isdigit() or char in ' .,!?')
|
16 |
+
|
17 |
+
|
18 |
|
19 |
|
20 |
def get_text_from_content_for_doc(content):
|
|
|
51 |
page_content=chunk,
|
52 |
metadata={"filename":filename,"file_type":file_type},
|
53 |
)
|
54 |
+
clean_filename = remove_non_standard_ascii(file_name)
|
55 |
+
uuid = f"{clean_filename}_{i}"
|
56 |
uuids.append(uuid)
|
57 |
documents.append(document)
|
58 |
|
|
|
61 |
return True
|
62 |
|
63 |
except Exception as e:
|
64 |
+
print(e)
|
65 |
return False
|
66 |
|
67 |
def get_retreive_answer(enterprise_id,prompt,index):
|
|
|
79 |
return response
|
80 |
|
81 |
except Exception as e:
|
82 |
+
print(e)
|
83 |
return False
|
84 |
|
85 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
|