File size: 2,644 Bytes
a0e37e2
cc80c3d
f86d7f2
 
 
 
cc80c3d
 
 
 
 
a0e37e2
 
f86d7f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0e37e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from typing import Dict, Any

from langchain_core.documents import Document

from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult
from ask_candid.retrieval.sources.utils import get_context

CandidBlogConfig = ElasticSourceConfig(
    index_name="search-semantic-candid-blog",
    text_fields=("content", "authors_text", "title_summary_tags")
)


def process_blog_hit(hit: ElasticHitsResult) -> Document:
    excerpt = hit.source.get("excerpt", "")
    title = hit.source.get("title", "")
    # we only need to process long text
    content_with_context_txt = get_context("content", hit, context_length=12, add_context=False)
    authors = get_context("authors_text", hit, context_length=12, add_context=False)
    tags = hit.source.get("title_summary_tags", "")
    return Document(
        page_content='\n\n'.join([title, excerpt, content_with_context_txt, authors, tags]),
        metadata={
            "title": title,
            "source": "Candid Blog",
            "source_id": hit.source["id"],
            "url": hit.source["link"]
        }
    )


def build_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str:
    url = f"{doc['link']}"
    fields = ["title", "excerpt"]

    fields_dict = {}
    fields_len = 0
    for field in fields:
        if doc.get(field, None) is not None:
            fields_dict[field] = doc[field]
            fields_dict[field + "_txt"] = f"<div>{doc[field]}</div>"

            if (fields_len + len(doc[field])) > 999:
                rest_text_len = 999 - fields_len
                if rest_text_len > 0:
                    fields_dict[field + "_txt"] = f"<div>{doc[field][:rest_text_len] + '[...]'}</div>"
                else: fields_dict[field + "_txt"] = f"<span>{'[...]'}</span>"
            fields_len = fields_len + len(doc[field])
        else:
            fields_dict[field] = ""
            fields_dict[field + "_txt"] = ""
    html = f"""
    <div style='height: {height_px}px; padding: 5px;'>
        <div style='height: {height_px}px; border: 1px solid #febe10;'>
            <span style='padding-left: 10px; display: inline-block; width: 100%;'>
                <div>
                    <span>
                        <b>Candid blog post:</b>
                        <a href='{url}' target='_blank' style='text-decoration: none;'>
                            {doc['title']} 
                        </a>
                    </span>
                    <br>
                    <br>
                    {fields_dict["excerpt_txt"]}
                </div>
            </span>
        </div>
    </div>
    """
    return html