Spaces:
Running
Running
File size: 1,628 Bytes
d2b1491 9e2a8ba d2b1491 9e2a8ba d2b1491 9e2a8ba d2b1491 8c33239 d2b1491 9e2a8ba c09c66e 9e2a8ba 8c33239 9e2a8ba d2b1491 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from enum import StrEnum
from pydantic import BaseModel
from sqlalchemy import Column
from sqlalchemy import Float
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.ext.declarative import declarative_base
from settings import CLUSTER_TOP_WORDS_TABLE_NAME
from settings import METADATA_TABLE_NAME
Base = declarative_base()
class VectorType(StrEnum):
dense = "dense"
sparse = "sparse"
hybrid = "hybrid"
class SearchRequestVector(BaseModel):
input_text: str
limit: int = 2000
min_year: int | None = None
max_year: int | None = 2025
score_threshold_dense: float | None = 0.7
vector_type: VectorType = VectorType.dense
class SearchRequestHybrid(SearchRequestVector):
limit: int = 50
limit_dense: int = 500
limit_sparse: int = 50
vector_type: VectorType = VectorType.hybrid
class SemanticSearchResults(BaseModel):
doi: str
score: float
class MetadataPosition(BaseModel):
doi: str
cluster: str
x: float
y: float
title: str
year: int
abstract: str # TODO: Can we have evrything in memory?
class MetadataFull(MetadataPosition):
scholar_link: str
class MetadataDB(Base):
__tablename__ = METADATA_TABLE_NAME
doi = Column(String, primary_key=True)
title = Column(String)
abstract = Column(String)
cluster = Column(String)
year = Column(Integer)
x = Column(Float)
y = Column(Float)
scholar_link = Column(String)
class ClusterWordsDB(Base):
__tablename__ = CLUSTER_TOP_WORDS_TABLE_NAME
cluster = Column(String, primary_key=True)
top_words = Column(String)
|