ppsingh commited on
Commit
a49b43b
1 Parent(s): c9fc9f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -1
app.py CHANGED
@@ -18,8 +18,52 @@ from langchain_core.output_parsers import StrOutputParser
18
  from langchain_huggingface import HuggingFaceEndpoint
19
  from dotenv import load_dotenv
20
  load_dotenv()
21
-
22
  HF_token = os.environ["HF_TOKEN"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  #process_pdf()
24
 
25
 
 
18
  from langchain_huggingface import HuggingFaceEndpoint
19
  from dotenv import load_dotenv
20
  load_dotenv()
 
21
  HF_token = os.environ["HF_TOKEN"]
22
+
23
+
24
+ # -------------------------------------------------------------
25
+ # Functions
26
+ # -------------------------------------------------------------
27
+ def make_html_source(source,i):
28
+ """
29
+ takes the text and converts it into html format for display in "source" side tab
30
+ """
31
+ meta = source.metadata
32
+ # content = source.page_content.split(":",1)[1].strip()
33
+ content = source.page_content.strip()
34
+
35
+ name = meta['source']
36
+ card = f"""
37
+ <div class="card" id="doc{i}">
38
+ <div class="card-content">
39
+ <h2>Doc {i} - {meta['file_path']} - Page {int(meta['page'])}</h2>
40
+ <p>{content}</p>
41
+ </div>
42
+ <div class="card-footer">
43
+ <span>{name}</span>
44
+ <a href="{meta['file_path']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
45
+ <span role="img" aria-label="Open PDF">🔗</span>
46
+ </a>
47
+ </div>
48
+ </div>
49
+ """
50
+
51
+ return card
52
+
53
+ def parse_output_llm_with_sources(output):
54
+ # Split the content into a list of text and "[Doc X]" references
55
+ content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
56
+ parts = []
57
+ for part in content_parts:
58
+ if part.startswith("Doc"):
59
+ subparts = part.split(",")
60
+ subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
61
+ subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
62
+ parts.append("".join(subparts))
63
+ else:
64
+ parts.append(part)
65
+ content_parts = "".join(parts)
66
+ return content_parts
67
  #process_pdf()
68
 
69