File size: 1,628 Bytes
d2b1491
 
9e2a8ba
 
 
 
 
 
 
 
 
 
 
 
d2b1491
 
 
 
 
 
 
 
9e2a8ba
d2b1491
9e2a8ba
 
d2b1491
 
 
 
 
8c33239
 
 
d2b1491
 
9e2a8ba
 
 
 
 
 
 
 
 
 
 
 
 
c09c66e
9e2a8ba
 
 
 
 
8c33239
9e2a8ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2b1491
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from enum import StrEnum

from pydantic import BaseModel
from sqlalchemy import Column
from sqlalchemy import Float
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.ext.declarative import declarative_base

from settings import CLUSTER_TOP_WORDS_TABLE_NAME
from settings import METADATA_TABLE_NAME

Base = declarative_base()


class VectorType(StrEnum):
    dense = "dense"
    sparse = "sparse"
    hybrid = "hybrid"


class SearchRequestVector(BaseModel):
    input_text: str
    limit: int = 2000
    min_year: int | None = None
    max_year: int | None = 2025
    score_threshold_dense: float | None = 0.7
    vector_type: VectorType = VectorType.dense


class SearchRequestHybrid(SearchRequestVector):
    limit: int = 50
    limit_dense: int = 500
    limit_sparse: int = 50
    vector_type: VectorType = VectorType.hybrid


class SemanticSearchResults(BaseModel):
    doi: str
    score: float


class MetadataPosition(BaseModel):
    doi: str
    cluster: str
    x: float
    y: float
    title: str
    year: int
    abstract: str # TODO: Can we have evrything in memory?


class MetadataFull(MetadataPosition):
    scholar_link: str


class MetadataDB(Base):
    __tablename__ = METADATA_TABLE_NAME
    doi = Column(String, primary_key=True)
    title = Column(String)
    abstract = Column(String)
    cluster = Column(String)
    year = Column(Integer)
    x = Column(Float)
    y = Column(Float)
    scholar_link = Column(String)


class ClusterWordsDB(Base):
    __tablename__ = CLUSTER_TOP_WORDS_TABLE_NAME
    cluster = Column(String, primary_key=True)
    top_words = Column(String)