Rams901 commited on
Commit
33a6d1c
1 Parent(s): 9ded2b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -50,6 +50,7 @@ def retrieve_thoughts(query, n):
50
  df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
51
  df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
52
  df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)
 
53
 
54
  # TO-DO: What if user query doesn't match what we provide as documents
55
 
@@ -58,10 +59,10 @@ def retrieve_thoughts(query, n):
58
  # tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]
59
 
60
 
61
- chunks_1 = tier_1.groupby(['title', 'url']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
62
  tier_1_adjusted = tier_1.groupby(['title', 'url']).first().reset_index()[['title', 'url', 'score']]
63
  tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
64
- tier_1_adjusted['content'] = chunks_1
65
 
66
  # chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
67
  # tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
@@ -97,7 +98,7 @@ def qa_retrieve(query, llm):
97
  tier_1 = thoughts['tier 1']
98
  # tier_2 = thoughts['tier 2']
99
 
100
- reference = tier_1[['ref', 'url', 'title', 'content','score']].to_dict('records')
101
 
102
  # tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
103
  # print(len(tier_1))
 
50
  df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
51
  df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
52
  df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)
53
+ df.sort_values("score", inplace = True)
54
 
55
  # TO-DO: What if user query doesn't match what we provide as documents
56
 
 
59
  # tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]
60
 
61
 
62
+ chunks_1 = tier_1.groupby(['title', 'url', ]).apply(lambda x: {f"chunk_{i}": row for i, row in enumerate(x.sort_values('id')[['score','page_content']].to_dict('records'))}).values
63
  tier_1_adjusted = tier_1.groupby(['title', 'url']).first().reset_index()[['title', 'url', 'score']]
64
  tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
65
+ tier_1_adjusted['chunks'] = chunks_1
66
 
67
  # chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
68
  # tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
 
98
  tier_1 = thoughts['tier 1']
99
  # tier_2 = thoughts['tier 2']
100
 
101
+ reference = tier_1[['ref', 'url', 'title', 'chunks']].to_dict('records')
102
 
103
  # tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
104
  # print(len(tier_1))