File size: 2,747 Bytes
59fb831
 
438c423
27a6856
 
8e36e4a
de96056
27a6856
 
 
59fb831
2482865
 
59fb831
2482865
d7c2b1e
 
2482865
 
 
 
 
 
 
 
d7c2b1e
2482865
 
d7c2b1e
2482865
27a6856
 
 
 
2482865
27a6856
 
 
 
 
 
 
 
2482865
27a6856
 
 
 
 
2482865
84389e5
59fb831
 
 
 
 
 
 
 
 
 
 
 
2482865
5aa7983
 
 
 
2482865
 
5aa7983
59fb831
5aa7983
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import streamlit as st
import difflib
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download NLTK stopwords if not already done
nltk.download('stopwords')

# Read the data
lpi_df = pd.read_csv('Learning Pathway Index.csv')

# Rename columns
lpi_df.rename(columns={
    "Course / Learning material": "Course_Learning_Material",
    "Course Level": "Course_Level",
    "Type (Free or Paid)": "Type",
    "Module / Sub-module \nDifficulty level": "Difficulty_Level",
    "Keywords / Tags / Skills / Interests / Categories": "Keywords"
}, inplace=True)

# Combine features
lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']

# Text preprocessing
combined_features = lpi_df['combined_features']
porter_stemmer = PorterStemmer()

def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

combined_features = combined_features.apply(stemming)

# TF-IDF and similarity
vectorizer = TfidfVectorizer()
vectorizer.fit(combined_features)
combined_features = vectorizer.transform(combined_features)
similarity = cosine_similarity(combined_features)

# Streamlit app
st.title('Learning Pathway Index Course Recommendation')
user_input = st.text_input('Enter What You Want to Learn : ')

if user_input:
    list_of_all_titles = lpi_df['Module'].tolist()
    find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)

    if find_close_match:
        close_match = find_close_match[0]
        index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
        similarity_score = list(enumerate(similarity[index_of_the_course]))
        sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)

        st.subheader('Courses suggested for you:')
        for i, course in enumerate(sorted_similar_course[:30], start=1):
            index = course[0]
            title_from_index = lpi_df.loc[index, 'Module']
            st.write(f"{i}. {title_from_index}")

        if len(sorted_similar_course) == 0:
            st.write('No close matches found.')
    else:
        st.write('No close matches found.')