awacke1 commited on
Commit
85064b1
1 Parent(s): ff0ccdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -69
app.py CHANGED
@@ -16,14 +16,12 @@ import pandas as pd
16
 
17
  # -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
18
  UseMemory=True
19
-
20
  if UseMemory:
21
  DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
22
  DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
23
  DATA_FILENAME="ChatbotMemory.csv"
24
  DATA_FILE=os.path.join("data", DATA_FILENAME)
25
  HF_TOKEN=os.environ.get("HF_TOKEN")
26
-
27
  if UseMemory:
28
  try:
29
  hf_hub_download(
@@ -38,60 +36,10 @@ if UseMemory:
38
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
39
  )
40
 
41
- async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
42
- URL = f"https://datasets-server.huggingface.co/splits?dataset={dataset_name}"
43
- async with httpx.AsyncClient() as session:
44
- response = await session.get(URL)
45
- return response.json()
46
-
47
- async def get_valid_datasets() -> Dict[str, List[str]]:
48
- URL = f"https://datasets-server.huggingface.co/valid"
49
- async with httpx.AsyncClient() as session:
50
- response = await session.get(URL)
51
- datasets = response.json()["valid"]
52
- return gr.Dropdown.update(choices=datasets, value="kelm")
53
- # The one to watch: https://huggingface.co/rungalileo
54
- # rungalileo/medical_transcription_40
55
-
56
- async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
57
- URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
58
- async with httpx.AsyncClient() as session:
59
- response = await session.get(URL)
60
- print(URL)
61
- gr.Markdown(URL)
62
- return response.json()
63
-
64
- def get_df_from_rows(api_output):
65
- return pd.DataFrame([row["row"] for row in api_output["rows"]])
66
-
67
- async def update_configs(dataset_name: str):
68
- splits = await get_splits(dataset_name)
69
- all_configs = sorted(set([s["config"] for s in splits["splits"]]))
70
- return (gr.Dropdown.update(choices=all_configs, value=all_configs[0]),
71
- splits)
72
-
73
- async def update_splits(config_name: str, state: gr.State):
74
- splits_for_config = sorted(set([s["split"] for s in state["splits"] if s["config"] == config_name]))
75
- dataset_name = state["splits"][0]["dataset"]
76
- dataset = await update_dataset(splits_for_config[0], config_name, dataset_name)
77
- return (gr.Dropdown.update(choices=splits_for_config, value=splits_for_config[0]), dataset)
78
-
79
- async def update_dataset(split_name: str, config_name: str, dataset_name: str):
80
- rows = await get_first_rows(dataset_name, config_name, split_name)
81
- df = get_df_from_rows(rows)
82
- return df
83
-
84
- # Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
85
- async def update_URL(dataset: str, config: str, split: str) -> str:
86
- URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
87
- URL = f"https://huggingface.co/datasets/{split}"
88
- return (URL)
89
-
90
- async def openurl(URL: str) -> str:
91
- html = f"<a href={URL} target=_blank>{URL}</a>"
92
- return (html)
93
-
94
-
95
  def store_message(name: str, message: str):
96
  if name and message:
97
  with open(DATA_FILE, "a") as csvfile:
@@ -99,15 +47,17 @@ def store_message(name: str, message: str):
99
  writer.writerow(
100
  {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
101
  )
102
- # uncomment line below to begin saving. If creating your own copy you will need to add a access token called "HF_TOKEN" to your profile, then create a secret for your repo with the access code naming it "HF_TOKEN" For the CSV as well you can copy the header and first few lines to your own then update the paths above which should work to save to your own repository for datasets.
103
  commit_url = repo.push_to_hub()
104
- return ""
105
 
 
 
 
 
 
106
  mname = "facebook/blenderbot-400M-distill"
107
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
108
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
109
 
110
-
111
  def take_last_tokens(inputs, note_history, history):
112
  """Filter the last 128 tokens"""
113
  if inputs['input_ids'].shape[1] > 128:
@@ -123,12 +73,10 @@ def add_note_to_history(note, note_history):
123
  note_history = '</s> <s>'.join(note_history)
124
  return [note_history]
125
 
126
-
127
  title = "💬ChatBack🧠💾"
128
  description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
129
  Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
130
 
131
-
132
  def chat(message, history):
133
  history = history or []
134
  if history:
@@ -142,23 +90,18 @@ def chat(message, history):
142
  response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
143
  history_useful = add_note_to_history(response, history_useful)
144
  list_history = history_useful[0].split('</s> <s>')
145
- history.append((list_history[-2], list_history[-1]))
146
-
147
- store_message(message, response) # Save to dataset -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
148
-
149
  return history, history
150
 
151
-
152
  gr.Interface(
153
  fn=chat,
154
  theme="huggingface",
155
  css=".footer {display:none !important}",
156
  inputs=["text", "state"],
157
- outputs=["chatbot", "state"],
158
  title=title,
159
  allow_flagging="never",
160
-
161
  description=f"Gradio chatbot backed by memory in a dataset repository.",
162
  article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) 🦃Thanks!🦃 Check out HF Datasets: https://huggingface.co/spaces/awacke1/FreddysDatasetViewer SOTA papers code and datasets on chat are here: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
163
-
164
  ).launch(debug=True)
 
16
 
17
  # -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
18
  UseMemory=True
 
19
  if UseMemory:
20
  DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
21
  DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
22
  DATA_FILENAME="ChatbotMemory.csv"
23
  DATA_FILE=os.path.join("data", DATA_FILENAME)
24
  HF_TOKEN=os.environ.get("HF_TOKEN")
 
25
  if UseMemory:
26
  try:
27
  hf_hub_download(
 
36
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
37
  )
38
 
39
+ def get_df(name: str):
40
+ dataset = load_dataset(str, split="train")
41
+ return dataset
42
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def store_message(name: str, message: str):
44
  if name and message:
45
  with open(DATA_FILE, "a") as csvfile:
 
47
  writer.writerow(
48
  {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
49
  )
 
50
  commit_url = repo.push_to_hub()
 
51
 
52
+ f=get_df(DATASET_REPO_ID)
53
+ print(f)
54
+ return ""
55
+ # ----------------------------------------------- For Memory
56
+
57
  mname = "facebook/blenderbot-400M-distill"
58
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
59
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
60
 
 
61
  def take_last_tokens(inputs, note_history, history):
62
  """Filter the last 128 tokens"""
63
  if inputs['input_ids'].shape[1] > 128:
 
73
  note_history = '</s> <s>'.join(note_history)
74
  return [note_history]
75
 
 
76
  title = "💬ChatBack🧠💾"
77
  description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
78
  Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
79
 
 
80
  def chat(message, history):
81
  history = history or []
82
  if history:
 
90
  response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
91
  history_useful = add_note_to_history(response, history_useful)
92
  list_history = history_useful[0].split('</s> <s>')
93
+ history.append((list_history[-2], list_history[-1]))
94
+ ret = store_message(message, response) # Save to dataset -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
 
 
95
  return history, history
96
 
 
97
  gr.Interface(
98
  fn=chat,
99
  theme="huggingface",
100
  css=".footer {display:none !important}",
101
  inputs=["text", "state"],
102
+ outputs=["chatbot", "state", "text"],
103
  title=title,
104
  allow_flagging="never",
 
105
  description=f"Gradio chatbot backed by memory in a dataset repository.",
106
  article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) 🦃Thanks!🦃 Check out HF Datasets: https://huggingface.co/spaces/awacke1/FreddysDatasetViewer SOTA papers code and datasets on chat are here: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
 
107
  ).launch(debug=True)