Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import json | |
| import numpy as np | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| import time | |
| from concurrent.futures import ThreadPoolExecutor | |
| def process_string(s): | |
| return s.lower().replace('&', 'and') | |
| #@st.cache | |
| def load_model(): | |
| return SentenceTransformer(r"finetiuned_model") | |
| def process_embedding(ingredient, model): | |
| processed_ingredient = process_string(ingredient) | |
| return model.encode([processed_ingredient]).tolist() | |
| def faiss_query(xq, index, top_k=1): | |
| distances, indices = index.search(np.array(xq).astype('float32'), top_k) | |
| return distances[0], indices[0] | |
| def get_top_matches(ingredients_flat, ingredients, loaded_model, index): | |
| matches = [] | |
| scores = [] | |
| # Generate embeddings in parallel | |
| with ThreadPoolExecutor() as executor: | |
| embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients)) | |
| # Query Faiss in parallel | |
| results = [] | |
| with ThreadPoolExecutor() as executor: | |
| results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings)) | |
| # Extract matches and scores | |
| for distances, indices in results: | |
| if indices.size > 0: | |
| match = ingredients_flat[indices[0]] | |
| matches.append(match) | |
| scores.append(round(1 - distances[0] / 2, 2)) | |
| return matches, scores | |
| # Load the Faiss index from disk | |
| index = faiss.read_index('faiss_index.bin') | |
| # Load the metadata from the JSON file | |
| with open('metadata_faiss.json', 'r') as f: | |
| metadata = json.load(f) | |
| ingredients_flat = [item["Ingredient"] for item in metadata] | |
| loaded_model = load_model() | |
| def main(): | |
| #st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide") | |
| st.title("Ingredients name matching App :smiley:") | |
| st.header("Matches using embeddings (semantic search)") | |
| st.write("Enter the JSON input:") | |
| json_input = st.text_area("") | |
| if st.button("Process"): | |
| start_time = time.time() | |
| with st.spinner("Processing..."): | |
| try: | |
| input_data = json.loads(json_input) | |
| for menu_item in input_data: | |
| ing_list = menu_item.get("ingredients", []) | |
| matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index) | |
| menu_item["Ingradients_matched"] = matches | |
| menu_item["scores"] = scores | |
| #st.write("Processed JSON:") | |
| #st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True) | |
| output_df = pd.DataFrame(input_data) | |
| st.write("Processed Data:") | |
| st.write(output_df) | |
| except json.JSONDecodeError: | |
| st.error("Invalid JSON input. Please check and try again.") | |
| end_time = time.time() | |
| st.write(f"Processing time: {end_time - start_time:.2f} seconds") | |
| if __name__ == "__main__": | |
| main() |