File size: 4,666 Bytes
2c3629a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

# Load the dataset
dataset = pd.read_csv('avito_cars.csv')

# Combine relevant columns into 'content'
dataset['content'] = (
    dataset['Marque'] + " " +
    dataset['Modèle'] + " " +
    dataset['Type de carburant'] + " " +
    dataset['Boite de vitesses']
)
dataset = dataset.drop_duplicates(subset=['content'])  # Remove duplicates

# Preprocess and build TF-IDF and LSI
vectorizer = TfidfVectorizer(stop_words=None)
tfidf_matrix = vectorizer.fit_transform(dataset['content'])

n_components = 50  # Number of LSI dimensions
svd = TruncatedSVD(n_components=n_components)
lsi_matrix = svd.fit_transform(tfidf_matrix)
lsi_matrix = normalize(lsi_matrix)

# Search function
def search(query, top_n=100):
    query_tfidf = vectorizer.transform([query])
    query_lsi = svd.transform(query_tfidf)
    query_lsi = normalize(query_lsi)
    similarities = cosine_similarity(query_lsi, lsi_matrix).flatten()
    top_indices = similarities.argsort()[-top_n:][::-1]
    results = dataset.iloc[top_indices]
    return results, similarities[top_indices]

# Streamlit Interface
st.title("Moteur de recherche de voitures basé sur le LSI (Latent Semantic Indexing)")
st.write("Recherchez des voitures en utilisant des mots-clés (par ex. : 'Peugeot Diesel Manuelle').")

# User input
query = st.text_input("Entrez votre requête de recherche :")
top_n = st.slider("Nombre de résultats à afficher par page :", min_value=3, max_value=12, step=3, value=6)

# Pagination logic
if "page" not in st.session_state:
    st.session_state.page = 1

#if st.button("Previous Page"):
#    st.session_state.page = max(1, st.session_state.page - 1)

#if st.button("Next Page"):
#    st.session_state.page += 1

# Search and display
if st.button("Search") or query.strip():
    results, similarities = search(query)
    total_results = len(results)
    results_per_page = top_n
    total_pages = (total_results // results_per_page) + (1 if total_results % results_per_page != 0 else 0)

    # Paginate results
    start_idx = (st.session_state.page - 1) * results_per_page
    end_idx = start_idx + results_per_page
    paginated_results = results.iloc[start_idx:end_idx]
    st.write(f"Showing results {start_idx + 1}-{min(end_idx, total_results)} of {total_results} (Page {st.session_state.page}/{total_pages}):")

    # Start the grid layout
   # Display cards in rows using Streamlit's `st.columns()`
    for i, (index, row) in enumerate(paginated_results.iterrows()):
        if i % 3 == 0:  # Create a new row every 3 cards
            cols = st.columns(3)  # 3 cards per row
        
        # Use the appropriate column in the row
        with cols[i % 3]:
            link = row['Lien']
            st.markdown(
                f"""

                <div style="

                    border: 1px solid green;

                    border-radius: 10px;

                    padding: 10px;

                    background-color: #f9f9f9;

                    text-align: left;

                    height: auto;

                    margin-bottom: 20px;

                ">

                    <h5>{row['content']}</h5>

                    <p><strong>Année-Modèle:</strong> {row['Année-Modèle']}</p>

                    <p><strong>Price:</strong> {row['Prix']} MAD</p>

                    <p><strong>City:</strong> {row['Ville']}</p>

                    <p><strong>Kilométrage:</strong> {row['Kilométrage']} km</p>

                    <a href="{link}" target="_blank" style="

                        display: block;

                        margin: 10px auto 0 auto; 

                        background-color: #4CAF50; 

                        color: white; 

                        padding: 5px 10px; 

                        text-align: center; 

                        text-decoration: none; 

                        border-radius: 5px;">

                        View Details

                    </a>

                </div>

                """,
                unsafe_allow_html=True,
            )

        

    
    
    # Pagination controls
    st.write("Navigation:")
    col1, col2, col3 = st.columns(3)
    with col1:
        if st.button("Previous"):
            st.session_state.page = max(1, st.session_state.page - 1)
    with col3:
        if st.button("Next"):
            st.session_state.page += 1