Samarth991 commited on
Commit
5f2768f
1 Parent(s): e433400

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.prompts import PromptTemplate
9
  from langchain_community.llms.huggingface_hub import HuggingFaceHub
10
- from read_photodocument import convert_PDF_to_Text
11
  from doctr.io import DocumentFile
12
  from doctr.models import ocr_predictor
13
  import contextlib
@@ -61,6 +61,7 @@ def summarize_data(docs,llm_model,chain_type='refine'):
61
 
62
  prompt = PromptTemplate.from_template(prompt_template)
63
  refine_prompt = PromptTemplate.from_template(refine_template)
 
64
 
65
  chain = load_summarize_chain(llm=llm_model,
66
  chain_type=chain_type,
@@ -76,6 +77,7 @@ def summarize_data(docs,llm_model,chain_type='refine'):
76
  consice_sumary = re.search("CONCISE SUMMARY:.*\.*$", output_text).group(0)
77
  dash_id = consice_sumary.find('-')
78
  return consice_sumary[:dash_id].replace(' ','\n')
 
79
  # matches = re.finditer(regex, output_text, re.DOTALL)
80
  # for matchNum, match in enumerate(matches, start=1):
81
  # for groupNum in range(0, len(match.groups())):
@@ -115,6 +117,15 @@ def document_loader(temperature,max_tokens,api_key,model_name,file_path):
115
  was_truncated = conversion_stats["truncated"]
116
  print("Converted text {}\nNum Pages;{}".format(converted_txt,num_pages))
117
 
 
 
 
 
 
 
 
 
 
118
  if converted_txt:
119
  print("Document Processed ..")
120
  texts = process_documents(texts=converted_txt)
 
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.prompts import PromptTemplate
9
  from langchain_community.llms.huggingface_hub import HuggingFaceHub
10
+ from read_photodocument import convert_PDF_to_Text,convert_image_to_pdf
11
  from doctr.io import DocumentFile
12
  from doctr.models import ocr_predictor
13
  import contextlib
 
61
 
62
  prompt = PromptTemplate.from_template(prompt_template)
63
  refine_prompt = PromptTemplate.from_template(refine_template)
64
+
65
 
66
  chain = load_summarize_chain(llm=llm_model,
67
  chain_type=chain_type,
 
77
  consice_sumary = re.search("CONCISE SUMMARY:.*\.*$", output_text).group(0)
78
  dash_id = consice_sumary.find('-')
79
  return consice_sumary[:dash_id].replace(' ','\n')
80
+
81
  # matches = re.finditer(regex, output_text, re.DOTALL)
82
  # for matchNum, match in enumerate(matches, start=1):
83
  # for groupNum in range(0, len(match.groups())):
 
117
  was_truncated = conversion_stats["truncated"]
118
  print("Converted text {}\nNum Pages;{}".format(converted_txt,num_pages))
119
 
120
+ elif file_path.endswith('.jpg') or file_path.endswith('.jpeg'):
121
+ conversion_stats = convert_image_to_pdf(file_path,model)
122
+ converted_txt = conversion_stats["converted_text"]
123
+ num_pages = conversion_stats["num_pages"]
124
+ was_truncated = conversion_stats["truncated"]
125
+ print("Converted text {}\nNum Pages;{}".format(converted_txt,num_pages))
126
+
127
+ else:
128
+ return ("Invalid Format ....")
129
  if converted_txt:
130
  print("Document Processed ..")
131
  texts = process_documents(texts=converted_txt)