Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,8 +18,52 @@ from langchain_core.output_parsers import StrOutputParser
|
|
| 18 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 19 |
from dotenv import load_dotenv
|
| 20 |
load_dotenv()
|
| 21 |
-
|
| 22 |
HF_token = os.environ["HF_TOKEN"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
#process_pdf()
|
| 24 |
|
| 25 |
|
|
|
|
| 18 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 19 |
from dotenv import load_dotenv
|
| 20 |
load_dotenv()
|
|
|
|
| 21 |
HF_token = os.environ["HF_TOKEN"]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# -------------------------------------------------------------
|
| 25 |
+
# Functions
|
| 26 |
+
# -------------------------------------------------------------
|
| 27 |
+
def make_html_source(source,i):
|
| 28 |
+
"""
|
| 29 |
+
takes the text and converts it into html format for display in "source" side tab
|
| 30 |
+
"""
|
| 31 |
+
meta = source.metadata
|
| 32 |
+
# content = source.page_content.split(":",1)[1].strip()
|
| 33 |
+
content = source.page_content.strip()
|
| 34 |
+
|
| 35 |
+
name = meta['source']
|
| 36 |
+
card = f"""
|
| 37 |
+
<div class="card" id="doc{i}">
|
| 38 |
+
<div class="card-content">
|
| 39 |
+
<h2>Doc {i} - {meta['file_path']} - Page {int(meta['page'])}</h2>
|
| 40 |
+
<p>{content}</p>
|
| 41 |
+
</div>
|
| 42 |
+
<div class="card-footer">
|
| 43 |
+
<span>{name}</span>
|
| 44 |
+
<a href="{meta['file_path']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
|
| 45 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
| 46 |
+
</a>
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
return card
|
| 52 |
+
|
| 53 |
+
def parse_output_llm_with_sources(output):
|
| 54 |
+
# Split the content into a list of text and "[Doc X]" references
|
| 55 |
+
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
| 56 |
+
parts = []
|
| 57 |
+
for part in content_parts:
|
| 58 |
+
if part.startswith("Doc"):
|
| 59 |
+
subparts = part.split(",")
|
| 60 |
+
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
| 61 |
+
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
| 62 |
+
parts.append("".join(subparts))
|
| 63 |
+
else:
|
| 64 |
+
parts.append(part)
|
| 65 |
+
content_parts = "".join(parts)
|
| 66 |
+
return content_parts
|
| 67 |
#process_pdf()
|
| 68 |
|
| 69 |
|