test2text / app /backend /chunks_processing.py
Davide Panza
Upload 56 files
1d8ed3b verified
import random
import streamlit as st
def query_collection(collection, query='', nresults=3, context_multiplier=2, sim_th=None):
"""Get relevant text from a collection for a given query"""
query_result = collection.query(query_texts=query, n_results=nresults*context_multiplier)
docs = query_result.get('documents')[0]
if sim_th is not None:
similarities = [1 - d for d in query_result.get("distances")[0]]
relevant_docs = [d for d, s in zip(docs, similarities) if s >= sim_th]
return ''.join(relevant_docs)
return docs
def get_chapter_context(chapters, chapter_number, n_questions):
chapter = chapters[chapter_number]
print(chapter.keys())
if chapter is None:
raise ValueError(f"Chapter {chapter_number} not found in the chapters list.")
if 'chunks' not in chapter:
raise ValueError(f"Chapter {chapter_number} does not contain 'text' key.")
n_chunks = len(chapter['chunks'])
if n_chunks == 0:
raise ValueError(f"Chapter {chapter_number} has no chunks to process.")
chunks_indices = random.sample(range(n_chunks), min(n_questions, n_chunks))
st.session_state['chapter_selected_chunks'] = [chapter['chunks'][i] for i in chunks_indices]