File size: 2,747 Bytes
59fb831 438c423 27a6856 8e36e4a de96056 27a6856 59fb831 2482865 59fb831 2482865 d7c2b1e 2482865 d7c2b1e 2482865 d7c2b1e 2482865 27a6856 2482865 27a6856 2482865 27a6856 2482865 84389e5 59fb831 2482865 5aa7983 2482865 5aa7983 59fb831 5aa7983 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
import difflib
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Download NLTK stopwords if not already done
nltk.download('stopwords')
# Read the data
lpi_df = pd.read_csv('Learning Pathway Index.csv')
# Rename columns
lpi_df.rename(columns={
"Course / Learning material": "Course_Learning_Material",
"Course Level": "Course_Level",
"Type (Free or Paid)": "Type",
"Module / Sub-module \nDifficulty level": "Difficulty_Level",
"Keywords / Tags / Skills / Interests / Categories": "Keywords"
}, inplace=True)
# Combine features
lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']
# Text preprocessing
combined_features = lpi_df['combined_features']
porter_stemmer = PorterStemmer()
def stemming(content):
stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
stemmed_content = stemmed_content.lower()
stemmed_content = stemmed_content.split()
stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
stemmed_content = ' '.join(stemmed_content)
return stemmed_content
combined_features = combined_features.apply(stemming)
# TF-IDF and similarity
vectorizer = TfidfVectorizer()
vectorizer.fit(combined_features)
combined_features = vectorizer.transform(combined_features)
similarity = cosine_similarity(combined_features)
# Streamlit app
st.title('Learning Pathway Index Course Recommendation')
user_input = st.text_input('Enter What You Want to Learn : ')
if user_input:
list_of_all_titles = lpi_df['Module'].tolist()
find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)
if find_close_match:
close_match = find_close_match[0]
index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
similarity_score = list(enumerate(similarity[index_of_the_course]))
sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)
st.subheader('Courses suggested for you:')
for i, course in enumerate(sorted_similar_course[:30], start=1):
index = course[0]
title_from_index = lpi_df.loc[index, 'Module']
st.write(f"{i}. {title_from_index}")
if len(sorted_similar_course) == 0:
st.write('No close matches found.')
else:
st.write('No close matches found.')
|