Spaces:

Enjoy0000
/

README

No application file

App Files Files Community

README / main.py

Yomna35

Upload 3 files

198e82e verified over 1 year ago

raw

history blame contribute delete

3.2 kB

	from flask import Flask, request, jsonify
	from langchain_community.llms import LlamaCpp
	import os
	app = Flask(__name__)

	n_gpu_layers = 0
	n_batch = 1024


	llm = LlamaCpp(
	model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
	temperature=0.1,
	n_gpu_layers=n_gpu_layers,
	n_batch=n_batch,
	verbose=True,
	n_ctx=4096
	)
	file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
	print("model size ====> :", file_size.st_size, "bytes")


	@app.route('/', methods=['POST'])
	def get_skills():
	cv_body = request.json.get('cv_body')

	# Simple inference example
	output = llm(
	f"<\|user\|>\n{cv_body}<\|end\|>\n<\|assistant\|>Can you list the skills mentioned in the CV?<\|end\|>",
	max_tokens=256, # Generate up to 256 tokens
	stop=["<\|end\|>"],
	echo=True, # Whether to echo the prompt
	)

	return jsonify({'skills': output})

	if __name__ == '__main__':
	app.run()
	from flask import Flask, request, jsonify
	import nltk
	from gensim.models import Word2Vec
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity
	import matplotlib.pyplot as plt
	import io
	import base64

	nltk.download('punkt')

	app = Flask(__name__)

	texts = [
	"This is a sample text.",
	"Another example of text.",
	"More texts to compare."
	]

	tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts]

	word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4)

	def text_embedding(text):
	words = nltk.word_tokenize(text.lower())
	embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv]
	if embeddings:
	return np.mean(embeddings, axis=0)
	else:
	return np.zeros(word_embeddings_model.vector_size)

	@app.route('/process', methods=['POST'])
	def process():
	data = request.get_json()
	input_text = data.get('input_text', '')

	if not input_text:
	return jsonify({'error': 'No input text provided'}), 400

	input_embedding = text_embedding(input_text)
	text_embeddings = [text_embedding(text) for text in texts]

	similarities = cosine_similarity([input_embedding], text_embeddings).flatten()
	similarities_percentages = [similarity * 100 for similarity in similarities]

	fig, ax = plt.subplots(figsize=(10, 6))
	texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))]
	ax.bar(texts_for_plotting, similarities_percentages)
	ax.set_ylabel('Similarity (%)')
	ax.set_xlabel('Texts')
	ax.set_title('Similarity of Input Text with other texts')
	plt.xticks(rotation=45, ha='right')
	plt.tight_layout()

	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	img_base64 = base64.b64encode(buf.read()).decode('utf-8')
	plt.close()

	sorted_indices = np.argsort(similarities)[::-1]
	similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]]

	response = {
	'similarities': similarities_percentages,
	'plot': img_base64,
	'most_similar_texts': similar_texts
	}

	return jsonify(response)

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=8080, debug=True)