Spaces:
Running
Running
alonsosilva
commited on
Commit
•
a3cec77
1
Parent(s):
07b634d
Add app
Browse files- Dockerfile +25 -0
- app.py +234 -0
- requirements.txt +7 -0
- test_kuzudb/.lock +0 -0
- test_kuzudb/.shadow +0 -0
- test_kuzudb/.wal +0 -0
- test_kuzudb/catalog.kz +0 -0
- test_kuzudb/data.kz +0 -0
- test_kuzudb/metadata.kz +0 -0
- test_kuzudb/n-0.hindex +0 -0
- test_kuzudb/n-0.hindex.ovf +0 -0
- test_kuzudb/n-1.hindex +0 -0
- test_kuzudb/n-1.hindex.ovf +0 -0
- test_kuzudb/n-2.hindex +0 -0
- test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn +1 -0
- test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn +0 -0
- test_lancedb/Songs.lance/_versions/1.manifest +0 -0
- test_lancedb/Songs.lance/_versions/2.manifest +0 -0
- test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance +0 -0
Dockerfile
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
# Set up a new user named "user" with user ID 1000 for permission
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
# Switch to the "user" user
|
6 |
+
USER user
|
7 |
+
# Set home to the user's home directory
|
8 |
+
ENV HOME=/home/user \
|
9 |
+
PATH=/home/user/.local/bin:$PATH
|
10 |
+
|
11 |
+
# Upgreade pip
|
12 |
+
RUN pip install --no-cache-dir --upgrade pip
|
13 |
+
|
14 |
+
COPY --chown=user requirements.txt requirements.txt
|
15 |
+
|
16 |
+
# Install requirements
|
17 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
18 |
+
|
19 |
+
COPY --chown=user app.py app.py
|
20 |
+
|
21 |
+
COPY --chown=user test_lancedb/ test_lancedb/
|
22 |
+
|
23 |
+
COPY --chown=user test_kuzudb/ test_kuzudb/
|
24 |
+
|
25 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from dotenv import find_dotenv, load_dotenv
|
2 |
+
# _ = load_dotenv(find_dotenv())
|
3 |
+
|
4 |
+
import solara
|
5 |
+
|
6 |
+
import polars as pl
|
7 |
+
|
8 |
+
df = pl.read_csv(
|
9 |
+
"https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
|
10 |
+
)
|
11 |
+
|
12 |
+
import string
|
13 |
+
|
14 |
+
df = df.with_columns(
|
15 |
+
pl.Series("Album", [string.capwords(album) for album in df["Album"]])
|
16 |
+
)
|
17 |
+
df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
|
18 |
+
df = df.with_columns(pl.col("Lyrics").fill_null("None"))
|
19 |
+
|
20 |
+
df = df.with_columns(
|
21 |
+
text=pl.lit("# ")
|
22 |
+
+ pl.col("Album")
|
23 |
+
+ pl.lit(": ")
|
24 |
+
+ pl.col("Song")
|
25 |
+
+ pl.lit("\n\n")
|
26 |
+
+ pl.col("Lyrics")
|
27 |
+
)
|
28 |
+
|
29 |
+
import shutil
|
30 |
+
import lancedb
|
31 |
+
|
32 |
+
shutil.rmtree("test_lancedb", ignore_errors=True)
|
33 |
+
db = lancedb.connect("test_lancedb")
|
34 |
+
|
35 |
+
from lancedb.embeddings import get_registry
|
36 |
+
|
37 |
+
embeddings = (
|
38 |
+
get_registry()
|
39 |
+
.get("sentence-transformers")
|
40 |
+
.create(name="TaylorAI/gte-tiny", device="cpu")
|
41 |
+
)
|
42 |
+
|
43 |
+
from lancedb.pydantic import LanceModel, Vector
|
44 |
+
|
45 |
+
|
46 |
+
class Songs(LanceModel):
|
47 |
+
Song: str
|
48 |
+
Lyrics: str
|
49 |
+
Album: str
|
50 |
+
Artist: str
|
51 |
+
text: str = embeddings.SourceField()
|
52 |
+
vector: Vector(embeddings.ndims()) = embeddings.VectorField()
|
53 |
+
|
54 |
+
table = db.create_table("Songs", schema=Songs)
|
55 |
+
table.add(data=df)
|
56 |
+
|
57 |
+
import os
|
58 |
+
from typing import Optional
|
59 |
+
|
60 |
+
from langchain_community.chat_models import ChatOpenAI
|
61 |
+
|
62 |
+
class ChatOpenRouter(ChatOpenAI):
|
63 |
+
openai_api_base: str
|
64 |
+
openai_api_key: str
|
65 |
+
model_name: str
|
66 |
+
|
67 |
+
def __init__(
|
68 |
+
self,
|
69 |
+
model_name: str,
|
70 |
+
openai_api_key: Optional[str] = None,
|
71 |
+
openai_api_base: str = "https://openrouter.ai/api/v1",
|
72 |
+
**kwargs,
|
73 |
+
):
|
74 |
+
openai_api_key = os.getenv("OPENROUTER_API_KEY")
|
75 |
+
super().__init__(
|
76 |
+
openai_api_base=openai_api_base,
|
77 |
+
openai_api_key=openai_api_key,
|
78 |
+
model_name=model_name,
|
79 |
+
**kwargs,
|
80 |
+
)
|
81 |
+
|
82 |
+
llm_openrouter = ChatOpenRouter(model_name="meta-llama/llama-3.1-405b-instruct", temperature=0.1)
|
83 |
+
|
84 |
+
def get_relevant_texts(query, table=table):
|
85 |
+
results = (
|
86 |
+
table.search(query)
|
87 |
+
.limit(5)
|
88 |
+
.to_polars()
|
89 |
+
)
|
90 |
+
return " ".join([results["text"][i] + "\n\n---\n\n" for i in range(5)])
|
91 |
+
|
92 |
+
def generate_prompt(query, table=table):
|
93 |
+
return (
|
94 |
+
"Answer the question based only on the following context:\n\n"
|
95 |
+
+ get_relevant_texts(query, table)
|
96 |
+
+ "\n\nQuestion: "
|
97 |
+
+ query
|
98 |
+
)
|
99 |
+
|
100 |
+
def generate_response(query, table=table):
|
101 |
+
prompt = generate_prompt(query, table)
|
102 |
+
response = llm_openrouter.invoke(input=prompt)
|
103 |
+
return response.content
|
104 |
+
|
105 |
+
import kuzu
|
106 |
+
|
107 |
+
shutil.rmtree("test_kuzudb", ignore_errors=True)
|
108 |
+
db = kuzu.Database("test_kuzudb")
|
109 |
+
conn = kuzu.Connection(db)
|
110 |
+
# Create schema
|
111 |
+
conn.execute("CREATE NODE TABLE ARTIST(name STRING, PRIMARY KEY (name))")
|
112 |
+
conn.execute("CREATE NODE TABLE ALBUM(name STRING, PRIMARY KEY (name))")
|
113 |
+
conn.execute("CREATE NODE TABLE SONG(ID SERIAL, name STRING, lyrics STRING, PRIMARY KEY(ID))")
|
114 |
+
conn.execute("CREATE REL TABLE IN_ALBUM(FROM SONG TO ALBUM)")
|
115 |
+
conn.execute("CREATE REL TABLE FROM_ARTIST(FROM ALBUM TO ARTIST)");
|
116 |
+
|
117 |
+
# Insert nodes
|
118 |
+
for artist in df["Artist"].unique():
|
119 |
+
conn.execute(f"CREATE (artist:ARTIST {{name: '{artist}'}})")
|
120 |
+
|
121 |
+
for album in df["Album"].unique():
|
122 |
+
conn.execute(f"""CREATE (album:ALBUM {{name: "{album}"}})""")
|
123 |
+
|
124 |
+
for song, lyrics in df.select(["Song", "text"]).unique().rows():
|
125 |
+
replaced_lyrics = lyrics.replace('"', "'")
|
126 |
+
conn.execute(
|
127 |
+
f"""CREATE (song:SONG {{name: "{song}", lyrics: "{replaced_lyrics}"}})"""
|
128 |
+
)
|
129 |
+
|
130 |
+
# Insert edges
|
131 |
+
for song, album, lyrics in df.select(["Song", "Album", "text"]).rows():
|
132 |
+
replaced_lyrics = lyrics.replace('"', "'")
|
133 |
+
conn.execute(
|
134 |
+
f"""
|
135 |
+
MATCH (song:SONG), (album:ALBUM)
|
136 |
+
WHERE song.name = "{song}" AND song.lyrics = "{replaced_lyrics}" AND album.name = "{album}"
|
137 |
+
CREATE (song)-[:IN_ALBUM]->(album)
|
138 |
+
"""
|
139 |
+
)
|
140 |
+
|
141 |
+
for album, artist in df.select(["Album", "Artist"]).unique().rows():
|
142 |
+
conn.execute(
|
143 |
+
f"""
|
144 |
+
MATCH (album:ALBUM), (artist:ARTIST) WHERE album.name = "{album}" AND artist.name = "{artist}"
|
145 |
+
CREATE (album)-[:FROM_ARTIST]->(artist)
|
146 |
+
"""
|
147 |
+
)
|
148 |
+
|
149 |
+
response = conn.execute(
|
150 |
+
"""
|
151 |
+
MATCH (a:ALBUM {name: 'The Black Album'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
|
152 |
+
"""
|
153 |
+
)
|
154 |
+
|
155 |
+
df_response = response.get_as_pl()
|
156 |
+
|
157 |
+
from langchain_community.graphs import KuzuGraph
|
158 |
+
|
159 |
+
graph = KuzuGraph(db)
|
160 |
+
|
161 |
+
def generate_kuzu_prompt(user_query):
|
162 |
+
return """Task: Generate Kùzu Cypher statement to query a graph database.
|
163 |
+
|
164 |
+
Instructions:
|
165 |
+
Generate the Kùzu dialect of Cypher with the following rules in mind:
|
166 |
+
1. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
167 |
+
2. Do not include triple backticks ``` in your response. Return only Cypher.
|
168 |
+
3. Do not return any notes or comments in your response.
|
169 |
+
|
170 |
+
|
171 |
+
Use only the provided relationship types and properties in the schema.
|
172 |
+
Do not use any other relationship types or properties that are not provided.
|
173 |
+
|
174 |
+
Schema:\n""" + graph.get_schema + """\nExample:
|
175 |
+
The question is:\n"Which songs does the load album have?"
|
176 |
+
MATCH (a:ALBUM {name: 'Load'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
|
177 |
+
|
178 |
+
Note: Do not include any explanations or apologies in your responses.
|
179 |
+
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
|
180 |
+
Do not include any text except the generated Cypher statement.
|
181 |
+
|
182 |
+
The question is:\n""" + user_query
|
183 |
+
|
184 |
+
|
185 |
+
def generate_final_prompt(query,cypher_query,col_name,_values):
|
186 |
+
return f"""You are an assistant that helps to form nice and human understandable answers.
|
187 |
+
The information part contains the provided information that you must use to construct an answer.
|
188 |
+
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
|
189 |
+
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
|
190 |
+
Here is an example:
|
191 |
+
|
192 |
+
Question: Which managers own Neo4j stocks?
|
193 |
+
Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
|
194 |
+
Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
|
195 |
+
|
196 |
+
Follow this example when generating answers.
|
197 |
+
If the provided information is empty, say that you don't know the answer.
|
198 |
+
Query:\n{cypher_query}
|
199 |
+
Information:
|
200 |
+
[{col_name}: {_values}]
|
201 |
+
|
202 |
+
Question: {query}
|
203 |
+
Helpful Answer:
|
204 |
+
"""
|
205 |
+
|
206 |
+
def generate_kg_response(query):
|
207 |
+
prompt = generate_kuzu_prompt(query)
|
208 |
+
cypher_query_response = llm_openrouter.invoke(input=prompt)
|
209 |
+
cypher_query = cypher_query_response.content
|
210 |
+
response = conn.execute(
|
211 |
+
f"""
|
212 |
+
{cypher_query}
|
213 |
+
"""
|
214 |
+
)
|
215 |
+
df = response.get_as_pl()
|
216 |
+
col_name = df.columns[0]
|
217 |
+
_values = df[col_name].to_list()
|
218 |
+
final_prompt = generate_final_prompt(query,cypher_query,col_name,_values)
|
219 |
+
final_response = llm_openrouter.invoke(input=final_prompt)
|
220 |
+
final_response = final_response.content
|
221 |
+
return final_response, cypher_query
|
222 |
+
|
223 |
+
query = solara.reactive("How many songs does the black album have?")
|
224 |
+
@solara.component
|
225 |
+
def Page():
|
226 |
+
with solara.Column(margin=10):
|
227 |
+
solara.Markdown("# Metallica Song Finder graph-only")
|
228 |
+
solara.InputText("Enter some query:", query, continuous_update=False)
|
229 |
+
if query.value != "":
|
230 |
+
response, cypher_query = generate_kg_response(query.value)
|
231 |
+
solara.Markdown("## Answer:")
|
232 |
+
solara.Markdown(response)
|
233 |
+
solara.Markdown("## Cypher query:")
|
234 |
+
solara.Markdown(cypher_query)
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
solara==1.39.0
|
2 |
+
polars==1.7.1
|
3 |
+
lancedb==0.13.0
|
4 |
+
sentence-transformers==3.1.1
|
5 |
+
langchain-community==0.3.0
|
6 |
+
openai==1.47.1
|
7 |
+
kuzu==0.6.0
|
test_kuzudb/.lock
ADDED
File without changes
|
test_kuzudb/.shadow
ADDED
File without changes
|
test_kuzudb/.wal
ADDED
Binary file (191 kB). View file
|
|
test_kuzudb/catalog.kz
ADDED
Binary file (76 Bytes). View file
|
|
test_kuzudb/data.kz
ADDED
File without changes
|
test_kuzudb/metadata.kz
ADDED
File without changes
|
test_kuzudb/n-0.hindex
ADDED
File without changes
|
test_kuzudb/n-0.hindex.ovf
ADDED
File without changes
|
test_kuzudb/n-1.hindex
ADDED
File without changes
|
test_kuzudb/n-1.hindex.ovf
ADDED
File without changes
|
test_kuzudb/n-2.hindex
ADDED
File without changes
|
test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
$792a2d61-740e-4405-ac65-9bfd39f8045e��Song ���������*string8Lyrics ���������*string8Album ���������*string8Artist ���������*string8text ���������*string82vector ���������*fixed_size_list:float:3848
|
test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn
ADDED
Binary file (104 Bytes). View file
|
|
test_lancedb/Songs.lance/_versions/1.manifest
ADDED
Binary file (616 Bytes). View file
|
|
test_lancedb/Songs.lance/_versions/2.manifest
ADDED
Binary file (676 Bytes). View file
|
|
test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance
ADDED
Binary file (521 kB). View file
|
|