|
from flask import Flask, request, jsonify |
|
from langchain_community.llms import LlamaCpp |
|
import os |
|
app = Flask(__name__) |
|
|
|
n_gpu_layers = 0 |
|
n_batch = 1024 |
|
|
|
|
|
llm = LlamaCpp( |
|
model_path="Phi-3-mini-4k-instruct-q4.gguf", |
|
temperature=0.1, |
|
n_gpu_layers=n_gpu_layers, |
|
n_batch=n_batch, |
|
verbose=True, |
|
n_ctx=4096 |
|
) |
|
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf') |
|
print("model size ====> :", file_size.st_size, "bytes") |
|
|
|
|
|
@app.route('/', methods=['POST']) |
|
def get_skills(): |
|
cv_body = request.json.get('cv_body') |
|
|
|
|
|
output = llm( |
|
f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>", |
|
max_tokens=256, |
|
stop=["<|end|>"], |
|
echo=True, |
|
) |
|
|
|
return jsonify({'skills': output}) |
|
|
|
if __name__ == '__main__': |
|
app.run() |
|
from flask import Flask, request, jsonify |
|
import nltk |
|
from gensim.models import Word2Vec |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import matplotlib.pyplot as plt |
|
import io |
|
import base64 |
|
|
|
nltk.download('punkt') |
|
|
|
app = Flask(__name__) |
|
|
|
texts = [ |
|
"This is a sample text.", |
|
"Another example of text.", |
|
"More texts to compare." |
|
] |
|
|
|
tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts] |
|
|
|
word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4) |
|
|
|
def text_embedding(text): |
|
words = nltk.word_tokenize(text.lower()) |
|
embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv] |
|
if embeddings: |
|
return np.mean(embeddings, axis=0) |
|
else: |
|
return np.zeros(word_embeddings_model.vector_size) |
|
|
|
@app.route('/process', methods=['POST']) |
|
def process(): |
|
data = request.get_json() |
|
input_text = data.get('input_text', '') |
|
|
|
if not input_text: |
|
return jsonify({'error': 'No input text provided'}), 400 |
|
|
|
input_embedding = text_embedding(input_text) |
|
text_embeddings = [text_embedding(text) for text in texts] |
|
|
|
similarities = cosine_similarity([input_embedding], text_embeddings).flatten() |
|
similarities_percentages = [similarity * 100 for similarity in similarities] |
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))] |
|
ax.bar(texts_for_plotting, similarities_percentages) |
|
ax.set_ylabel('Similarity (%)') |
|
ax.set_xlabel('Texts') |
|
ax.set_title('Similarity of Input Text with other texts') |
|
plt.xticks(rotation=45, ha='right') |
|
plt.tight_layout() |
|
|
|
buf = io.BytesIO() |
|
plt.savefig(buf, format='png') |
|
buf.seek(0) |
|
img_base64 = base64.b64encode(buf.read()).decode('utf-8') |
|
plt.close() |
|
|
|
sorted_indices = np.argsort(similarities)[::-1] |
|
similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]] |
|
|
|
response = { |
|
'similarities': similarities_percentages, |
|
'plot': img_base64, |
|
'most_similar_texts': similar_texts |
|
} |
|
|
|
return jsonify(response) |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=8080, debug=True) |
|
|