Spaces:
Running
Running
File size: 2,644 Bytes
a0e37e2 cc80c3d f86d7f2 cc80c3d a0e37e2 f86d7f2 a0e37e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from typing import Dict, Any
from langchain_core.documents import Document
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult
from ask_candid.retrieval.sources.utils import get_context
CandidBlogConfig = ElasticSourceConfig(
index_name="search-semantic-candid-blog",
text_fields=("content", "authors_text", "title_summary_tags")
)
def process_blog_hit(hit: ElasticHitsResult) -> Document:
excerpt = hit.source.get("excerpt", "")
title = hit.source.get("title", "")
# we only need to process long text
content_with_context_txt = get_context("content", hit, context_length=12, add_context=False)
authors = get_context("authors_text", hit, context_length=12, add_context=False)
tags = hit.source.get("title_summary_tags", "")
return Document(
page_content='\n\n'.join([title, excerpt, content_with_context_txt, authors, tags]),
metadata={
"title": title,
"source": "Candid Blog",
"source_id": hit.source["id"],
"url": hit.source["link"]
}
)
def build_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str:
url = f"{doc['link']}"
fields = ["title", "excerpt"]
fields_dict = {}
fields_len = 0
for field in fields:
if doc.get(field, None) is not None:
fields_dict[field] = doc[field]
fields_dict[field + "_txt"] = f"<div>{doc[field]}</div>"
if (fields_len + len(doc[field])) > 999:
rest_text_len = 999 - fields_len
if rest_text_len > 0:
fields_dict[field + "_txt"] = f"<div>{doc[field][:rest_text_len] + '[...]'}</div>"
else: fields_dict[field + "_txt"] = f"<span>{'[...]'}</span>"
fields_len = fields_len + len(doc[field])
else:
fields_dict[field] = ""
fields_dict[field + "_txt"] = ""
html = f"""
<div style='height: {height_px}px; padding: 5px;'>
<div style='height: {height_px}px; border: 1px solid #febe10;'>
<span style='padding-left: 10px; display: inline-block; width: 100%;'>
<div>
<span>
<b>Candid blog post:</b>
<a href='{url}' target='_blank' style='text-decoration: none;'>
{doc['title']}
</a>
</span>
<br>
<br>
{fields_dict["excerpt_txt"]}
</div>
</span>
</div>
</div>
"""
return html
|