File size: 2,786 Bytes
5707fbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import List, Dict
import faiss

class ContractVectorStore:
    def __init__(self, model: SentenceTransformer):
        self.model = model
        self.index = None
        self.texts = []
        self.dimension = 384  # dimension for 'all-MiniLM-L6-v2'

    def add_contract_terms(self, contract: Dict) -> None:
        """Add contract terms to the vector store"""
        terms = []
        
        # Add volume discounts
        if "volume_discounts" in contract["terms"]:
            for discount in contract["terms"]["volume_discounts"]:
                terms.append(
                    f"Volume discount: {discount['discount']*100}% off for quantities >= {discount['threshold']}"
                )
        
        # Add tiered pricing
        if "tiered_pricing" in contract["terms"]:
            for tier in contract["terms"]["tiered_pricing"]:
                terms.append(
                    f"Tier {tier['tier']}: Rate multiplier of {tier['rate']}x base rate"
                )
        
        # Add special conditions
        for condition in contract["terms"]["special_conditions"]:
            terms.append(condition)
        
        # Add base rate
        terms.append(f"Base rate is ${contract['terms']['base_rate']} per unit")
        
        # Create embeddings and update index
        self._add_texts(terms)

    def _add_texts(self, texts: List[str]) -> None:
        """Add texts to the vector store"""
        if not texts:
            return
            
        # Generate embeddings
        embeddings = self.model.encode(texts)
        
        # Initialize index if needed
        if self.index is None:
            self.index = faiss.IndexFlatL2(self.dimension)
        
        # Add to index
        self.index.add(np.array(embeddings).astype('float32'))
        self.texts.extend(texts)

    def search_relevant_terms(self, query: str, k: int = 3) -> List[Dict]:
        """Search for relevant terms using the query"""
        if not self.index or not self.texts:
            return []
        
        # Generate query embedding
        query_embedding = self.model.encode([query])[0].reshape(1, -1)
        
        # Search
        distances, indices = self.index.search(
            np.array(query_embedding).astype('float32'), 
            k
        )
        
        # Return results
        results = []
        for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
            if idx < len(self.texts):  # Ensure valid index
                results.append({
                    "text": self.texts[idx],
                    "score": float(1 / (1 + dist))  # Convert distance to similarity score
                })
        
        return results