Spaces:
Sleeping
Sleeping
HonestAnnie
commited on
Commit
•
331b253
1
Parent(s):
92ed022
Jetzt läuft alles!!
Browse files
app.py
CHANGED
@@ -4,6 +4,12 @@ import chromadb
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
import spaces
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
@spaces.GPU
|
8 |
def get_embeddings(queries, task):
|
9 |
model = SentenceTransformer("Linq-AI-Research/Linq-Embed-Mistral", use_auth_token=os.getenv("HF_TOKEN"))
|
@@ -11,59 +17,45 @@ def get_embeddings(queries, task):
|
|
11 |
query_embeddings = model.encode(prompts)
|
12 |
return query_embeddings
|
13 |
|
14 |
-
# Initialize a persistent Chroma client and retrieve collection
|
15 |
-
client = chromadb.PersistentClient(path="./chroma")
|
16 |
-
collection_de = client.get_collection(name="phil_de")
|
17 |
-
collection_en = client.get_collection(name="phil_en")
|
18 |
-
authors_list_de = ["Ludwig Wittgenstein", "Sigmund Freud", "Marcus Aurelius", "Friedrich Nietzsche", "Epiktet", "Ernst Jünger", "Georg Christoph Lichtenberg", "Balthasar Gracian", "Hannah Arendt", "Erich Fromm", "Albert Camus"]
|
19 |
-
authors_list_en = ["Friedrich Nietzsche", "Joscha Bach"]
|
20 |
-
|
21 |
def query_chroma(collection, embedding, authors):
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
where=where_filter,
|
29 |
-
include=["documents", "metadatas", "distances"]
|
30 |
-
)
|
31 |
-
|
32 |
-
ids = results.get('ids', [[]])[0]
|
33 |
-
metadatas = results.get('metadatas', [[]])[0]
|
34 |
-
documents = results.get('documents', [[]])[0]
|
35 |
-
distances = results.get('distances', [[]])[0]
|
36 |
-
|
37 |
-
formatted_results = []
|
38 |
-
for id_, metadata, document_text, distance in zip(ids, metadatas, documents, distances):
|
39 |
-
result_dict = {
|
40 |
-
"id": id_,
|
41 |
-
"author": metadata.get('author', 'Unknown author'),
|
42 |
-
"book": metadata.get('book', 'Unknown book'),
|
43 |
-
"section": metadata.get('section', 'Unknown section'),
|
44 |
-
"title": metadata.get('title', 'Untitled'),
|
45 |
-
"text": document_text,
|
46 |
-
"distance": distance
|
47 |
-
}
|
48 |
-
formatted_results.append(result_dict)
|
49 |
-
|
50 |
-
return formatted_results
|
51 |
-
except Exception as e:
|
52 |
-
return [{"error": str(e)}]
|
53 |
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
|
|
58 |
|
59 |
with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }") as demo:
|
60 |
-
gr.Markdown("Enter your query, filter authors (default is all), click **Search** to search.")
|
61 |
-
database_inp = gr.Dropdown(label="Database", choices=["
|
62 |
author_inp = gr.Dropdown(label="Authors", choices=authors_list_de, multiselect=True)
|
63 |
-
inp = gr.Textbox(label="Query", placeholder="
|
64 |
btn = gr.Button("Search")
|
65 |
results = gr.State()
|
66 |
|
|
|
|
|
|
|
67 |
database_inp.change(
|
68 |
fn=lambda database: update_authors(database),
|
69 |
inputs=[database_inp],
|
@@ -91,8 +83,12 @@ with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; bo
|
|
91 |
def display_accordion(data):
|
92 |
for query, res in data:
|
93 |
with gr.Accordion(query, open=False) as acc:
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
97 |
|
98 |
demo.launch()
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
import spaces
|
6 |
|
7 |
+
client = chromadb.PersistentClient(path="./chroma")
|
8 |
+
collection_de = client.get_collection(name="phil_de")
|
9 |
+
collection_en = client.get_collection(name="phil_en")
|
10 |
+
authors_list_de = ["Ludwig Wittgenstein", "Sigmund Freud", "Marcus Aurelius", "Friedrich Nietzsche", "Epiktet", "Ernst Jünger", "Georg Christoph Lichtenberg", "Balthasar Gracian", "Hannah Arendt", "Erich Fromm", "Albert Camus"]
|
11 |
+
authors_list_en = ["Friedrich Nietzsche", "Joscha Bach"]
|
12 |
+
|
13 |
@spaces.GPU
|
14 |
def get_embeddings(queries, task):
|
15 |
model = SentenceTransformer("Linq-AI-Research/Linq-Embed-Mistral", use_auth_token=os.getenv("HF_TOKEN"))
|
|
|
17 |
query_embeddings = model.encode(prompts)
|
18 |
return query_embeddings
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def query_chroma(collection, embedding, authors):
|
21 |
+
results = collection.query(
|
22 |
+
query_embeddings=[embedding.tolist()],
|
23 |
+
n_results=10,
|
24 |
+
where={"author": {"$in": authors}} if authors else {},
|
25 |
+
include=["documents", "metadatas", "distances"]
|
26 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
ids = results.get('ids', [[]])[0]
|
29 |
+
metadatas = results.get('metadatas', [[]])[0]
|
30 |
+
documents = results.get('documents', [[]])[0]
|
31 |
+
distances = results.get('distances', [[]])[0]
|
32 |
|
33 |
+
formatted_results = []
|
34 |
+
for id_, metadata, document_text, distance in zip(ids, metadatas, documents, distances):
|
35 |
+
result_dict = {
|
36 |
+
"id": id_,
|
37 |
+
"author": metadata.get('author', 'Unknown author'),
|
38 |
+
"book": metadata.get('book', 'Unknown book'),
|
39 |
+
"section": metadata.get('section', 'Unknown section'),
|
40 |
+
"title": metadata.get('title', 'Untitled'),
|
41 |
+
"text": document_text,
|
42 |
+
"distance": distance
|
43 |
+
}
|
44 |
+
formatted_results.append(result_dict)
|
45 |
|
46 |
+
return formatted_results
|
47 |
|
48 |
with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }") as demo:
|
49 |
+
gr.Markdown("Enter your query, filter authors (default is all), click **Search** to search. Delimit multiple queries with semicola; since there is a search-quota for each user (based on IP) it makes sense to query in batches (if you enjoy querying that is).")
|
50 |
+
database_inp = gr.Dropdown(label="Database", choices=["German", "English"], value="German")
|
51 |
author_inp = gr.Dropdown(label="Authors", choices=authors_list_de, multiselect=True)
|
52 |
+
inp = gr.Textbox(label="Query", placeholder="Wie kann ich gesund leben und bedeutet Gesundheit für jeden das gleiche?; Why is life so difficult and aren't there any shortcuts?")
|
53 |
btn = gr.Button("Search")
|
54 |
results = gr.State()
|
55 |
|
56 |
+
def update_authors(database):
|
57 |
+
return gr.update(choices=authors_list_de if database == "German" else authors_list_en)
|
58 |
+
|
59 |
database_inp.change(
|
60 |
fn=lambda database: update_authors(database),
|
61 |
inputs=[database_inp],
|
|
|
83 |
def display_accordion(data):
|
84 |
for query, res in data:
|
85 |
with gr.Accordion(query, open=False) as acc:
|
86 |
+
for result in res:
|
87 |
+
with gr.Column():
|
88 |
+
author = result.get('author', 'Unknown author')
|
89 |
+
book = result.get('book', 'Unknown book')
|
90 |
+
text = result.get('text')
|
91 |
+
markdown_contents = f"**{author}, {book}**\n\n{text}"
|
92 |
+
gr.Markdown(value=markdown_contents, elem_classes="custom-markdown")
|
93 |
|
94 |
demo.launch()
|