Spaces:
Runtime error
Runtime error
rituthombre
commited on
Commit
•
1f8ee4d
0
Parent(s):
Duplicate from RituJim/QNim
Browse files- .gitattributes +34 -0
- README.md +13 -0
- __pycache__/nim_game_env.cpython-37.pyc +0 -0
- __pycache__/nim_gpt_functions.cpython-37.pyc +0 -0
- __pycache__/qnim.cpython-37.pyc +0 -0
- app.py +154 -0
- nim_game_env.py +91 -0
- nim_gpt_functions.py +88 -0
- qnim.py +239 -0
- requirements.txt +7 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: QNim
|
3 |
+
emoji: 🦀
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.16.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: RituJim/QNim
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/nim_game_env.cpython-37.pyc
ADDED
Binary file (3.8 kB). View file
|
|
__pycache__/nim_gpt_functions.cpython-37.pyc
ADDED
Binary file (3.94 kB). View file
|
|
__pycache__/qnim.cpython-37.pyc
ADDED
Binary file (4.44 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import datetime
|
3 |
+
from nim_game_env import NimGameEnv
|
4 |
+
from nim_gpt_functions import plan_gpt_move, plan_qc_move, execute_move
|
5 |
+
|
6 |
+
TEMPERATURE_DEFAULT = 0.5
|
7 |
+
PILES_DEFAULT = [3, 5, 7]
|
8 |
+
HUMAN_STR = "Human"
|
9 |
+
PLAY_AGAINST_QC = "QC"
|
10 |
+
PLAY_AGAINST_GPT = "AI"
|
11 |
+
|
12 |
+
|
13 |
+
def reset_game(chat_history, nim_game_env):
|
14 |
+
chat_history = []
|
15 |
+
nim_game_env = NimGameEnv(PILES_DEFAULT)
|
16 |
+
game_state_text, game_state_piles = nim_game_env.reset()
|
17 |
+
ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
|
18 |
+
message_str = ""
|
19 |
+
return chat_history, chat_history, message_str, ascii_art, nim_game_env
|
20 |
+
|
21 |
+
|
22 |
+
def generate_game_state_ascii_art(piles, done, reward, player):
|
23 |
+
ascii_art = "Game Over, " + player + " wins!"
|
24 |
+
if not done:
|
25 |
+
pile_a = piles[0]
|
26 |
+
pile_b = piles[1]
|
27 |
+
pile_c = piles[2]
|
28 |
+
ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
|
29 |
+
return "<pre>" + ascii_art + "</pre>"
|
30 |
+
|
31 |
+
|
32 |
+
def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key, play_against):
|
33 |
+
if not openai_api_key or openai_api_key == "":
|
34 |
+
warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
|
35 |
+
return chat_history, chat_history, warning_msg
|
36 |
+
|
37 |
+
if not inp or inp == "":
|
38 |
+
warning_msg = "<pre>Please enter a move</pre>"
|
39 |
+
return chat_history, chat_history, warning_msg
|
40 |
+
|
41 |
+
inp = inp.strip()
|
42 |
+
output = None
|
43 |
+
chat_history = chat_history or []
|
44 |
+
|
45 |
+
text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
|
46 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
47 |
+
|
48 |
+
if done:
|
49 |
+
if reward == 1:
|
50 |
+
output = "Good game!"
|
51 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
52 |
+
else:
|
53 |
+
output = text_obs
|
54 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, play_against)
|
55 |
+
else:
|
56 |
+
if play_against == PLAY_AGAINST_QC:
|
57 |
+
output = plan_qc_move(text_obs)
|
58 |
+
else:
|
59 |
+
output = plan_gpt_move(text_obs, temperature, openai_api_key)
|
60 |
+
|
61 |
+
text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
|
62 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, play_against)
|
63 |
+
|
64 |
+
print("\n==== date/time: " + str(datetime.datetime.now() - datetime.timedelta(hours=5)) + " ====")
|
65 |
+
print("inp: " + inp, ", output: ", output, ", observation: ", observation)
|
66 |
+
|
67 |
+
chat_history.append((HUMAN_STR + ": " + inp, play_against + ": " + output))
|
68 |
+
return chat_history, chat_history, ascii_art
|
69 |
+
|
70 |
+
|
71 |
+
def update_foo(widget, state):
|
72 |
+
if widget:
|
73 |
+
state = widget
|
74 |
+
return state
|
75 |
+
|
76 |
+
|
77 |
+
def update_opponent(widget, state):
|
78 |
+
if widget:
|
79 |
+
state = widget
|
80 |
+
return state
|
81 |
+
|
82 |
+
|
83 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
84 |
+
with block as nim_game:
|
85 |
+
temperature_state = gr.State(TEMPERATURE_DEFAULT)
|
86 |
+
openai_api_key_state = gr.State()
|
87 |
+
history_state = gr.State()
|
88 |
+
nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
|
89 |
+
play_against_state = gr.State(PLAY_AGAINST_QC)
|
90 |
+
|
91 |
+
with gr.Row():
|
92 |
+
game_state_html = gr.Markdown()
|
93 |
+
|
94 |
+
with gr.Column():
|
95 |
+
title = gr.Markdown("""<h3><center>QNimGPT</center></h3>""")
|
96 |
+
subtitle = gr.Markdown("""<i><center>Play Nim against an IBM Quantum Computer simulator or OpenAI
|
97 |
+
GPT-3.5 AI. By <a href="https://github.com/ritu-thombre99">Ritu Thombre</a> and <a
|
98 |
+
href="https://github.com/JavaFXpert">James Weaver</a></center></i>""")
|
99 |
+
|
100 |
+
with gr.Column():
|
101 |
+
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
|
102 |
+
show_label=False, lines=1, type='password')
|
103 |
+
play_against_radio = gr.Radio(label="Play against:", choices=[
|
104 |
+
PLAY_AGAINST_QC, PLAY_AGAINST_GPT], value=PLAY_AGAINST_QC)
|
105 |
+
play_against_radio.change(update_foo,
|
106 |
+
inputs=[play_against_radio, play_against_state],
|
107 |
+
outputs=[play_against_state])
|
108 |
+
|
109 |
+
chatbot = gr.Chatbot()
|
110 |
+
|
111 |
+
with gr.Row():
|
112 |
+
message_tb = gr.Textbox(label="What's your move?",
|
113 |
+
placeholder="I'll take 2 sticks from pile A")
|
114 |
+
send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
115 |
+
|
116 |
+
with gr.Row():
|
117 |
+
gr.Examples(
|
118 |
+
examples=["I'll take two sticks from pile A",
|
119 |
+
"Three sticks from the second pile",
|
120 |
+
"From pile C remove 2 sticks"],
|
121 |
+
inputs=message_tb
|
122 |
+
)
|
123 |
+
reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
|
124 |
+
temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
|
125 |
+
step=0.1)
|
126 |
+
|
127 |
+
send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
128 |
+
openai_api_key_state, play_against_state],
|
129 |
+
outputs=[chatbot, history_state, game_state_html])
|
130 |
+
message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
131 |
+
openai_api_key_state, play_against_state],
|
132 |
+
outputs=[chatbot, history_state, game_state_html])
|
133 |
+
reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
|
134 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
135 |
+
nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
|
136 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
137 |
+
|
138 |
+
gr.Markdown("""<center>Each player may remove sticks from a pile on their turn.
|
139 |
+
Player to remove the last stick wins.
|
140 |
+
<a href="https://en.wikipedia.org/wiki/Nim" target="new">
|
141 |
+
Nim is one of the first-ever electronic computerized games</a>
|
142 |
+
</center>""")
|
143 |
+
|
144 |
+
gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
|
145 |
+
|
146 |
+
openai_api_key_textbox.change(update_foo,
|
147 |
+
inputs=[openai_api_key_textbox, openai_api_key_state],
|
148 |
+
outputs=[openai_api_key_state])
|
149 |
+
|
150 |
+
temperature_slider.change(update_foo,
|
151 |
+
inputs=[temperature_slider, temperature_state],
|
152 |
+
outputs=[temperature_state])
|
153 |
+
|
154 |
+
block.launch(debug=False)
|
nim_game_env.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC
|
2 |
+
|
3 |
+
import gymnasium as gym
|
4 |
+
from gymnasium import spaces
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
class NimGameEnv(gym.Env, ABC):
|
9 |
+
"""Custom environment for a simple Nim game.
|
10 |
+
|
11 |
+
In this game, there are two players and a number of piles of stones.
|
12 |
+
Each turn, a player can choose a pile and remove any number of stones from it.
|
13 |
+
The player who takes the last stone loses.
|
14 |
+
|
15 |
+
The observation space is a tuple of integers representing the number of stones in each pile.
|
16 |
+
The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self, starting_stick_piles=[3, 5, 7]):
|
20 |
+
self.starting_stick_piles = starting_stick_piles
|
21 |
+
self.num_piles = len(starting_stick_piles)
|
22 |
+
self.max_stones = max(starting_stick_piles)
|
23 |
+
self.piles = self._init_piles()
|
24 |
+
self.current_player = 0
|
25 |
+
self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
|
26 |
+
self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
|
27 |
+
|
28 |
+
def step(self, action):
|
29 |
+
"""Take a step in the environment.
|
30 |
+
|
31 |
+
Parameters
|
32 |
+
----------
|
33 |
+
action: tuple
|
34 |
+
The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
|
35 |
+
|
36 |
+
Returns
|
37 |
+
-------
|
38 |
+
observation: tuple
|
39 |
+
The current number of stones in each pile.
|
40 |
+
reward: float
|
41 |
+
The reward for the current step.
|
42 |
+
done: bool
|
43 |
+
Whether the game has ended.
|
44 |
+
info: dict
|
45 |
+
Additional information about the step.
|
46 |
+
"""
|
47 |
+
# Validate the action
|
48 |
+
if not self._is_valid_action(action):
|
49 |
+
raise ValueError("Invalid action")
|
50 |
+
|
51 |
+
# Update the piles
|
52 |
+
pile, num_stones = action
|
53 |
+
self.piles[pile] -= num_stones
|
54 |
+
|
55 |
+
# Determine if the game has ended
|
56 |
+
done = self._is_game_over()
|
57 |
+
|
58 |
+
# Calculate the reward
|
59 |
+
reward = self._calculate_reward()
|
60 |
+
|
61 |
+
# Switch the current player
|
62 |
+
self.current_player = (self.current_player + 1) % 2
|
63 |
+
return self.piles, reward, done, {}
|
64 |
+
|
65 |
+
def reset(self):
|
66 |
+
"""Reset the environment to the initial state."""
|
67 |
+
self.piles = self._init_piles()
|
68 |
+
self.current_player = 0
|
69 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
|
70 |
+
return text_observation, self.piles
|
71 |
+
|
72 |
+
def _init_piles(self):
|
73 |
+
"""Initialize the stick piles."""
|
74 |
+
return [3, 5, 7]
|
75 |
+
|
76 |
+
def _generate_random_stones(self):
|
77 |
+
"""Generate a random number of stones (between 1 and max_stones inclusive)."""
|
78 |
+
return np.random.randint(1, self.max_stones + 1)
|
79 |
+
|
80 |
+
def _is_valid_action(self, action):
|
81 |
+
"""Determine if an action is valid."""
|
82 |
+
pile, num_stones = action
|
83 |
+
return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
|
84 |
+
|
85 |
+
def _is_game_over(self):
|
86 |
+
"""Determine if the game has ended."""
|
87 |
+
return all(pile == 0 for pile in self.piles)
|
88 |
+
|
89 |
+
def _calculate_reward(self):
|
90 |
+
"""Calculate the reward for the current step."""
|
91 |
+
return 1 if self._is_game_over() else 0
|
nim_gpt_functions.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain import OpenAI
|
2 |
+
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
|
3 |
+
from langchain.chains import LLMChain
|
4 |
+
from qnim import get_quantum_move
|
5 |
+
import re
|
6 |
+
import inflect
|
7 |
+
p = inflect.engine()
|
8 |
+
EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
|
9 |
+
input_variables=["input", "output"],
|
10 |
+
template="Input: {input}\nOutput: {output}"
|
11 |
+
)
|
12 |
+
|
13 |
+
PLAN_MOVE_PROMPT_EXAMPLES = [
|
14 |
+
{"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
|
15 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
|
16 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
|
17 |
+
{"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
|
18 |
+
{"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
|
19 |
+
{"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
|
20 |
+
]
|
21 |
+
|
22 |
+
PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
23 |
+
examples=PLAN_MOVE_PROMPT_EXAMPLES,
|
24 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
25 |
+
prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
|
26 |
+
"The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
|
27 |
+
"of these inputs represent a game state. For each of these game states please express a logical move that "
|
28 |
+
"consists of taking some number of sticks from a pile. "
|
29 |
+
"You may not take any sticks from a pile that contains 0 sticks. "
|
30 |
+
"You may not take more sticks from a pile than it contains. "
|
31 |
+
"You may only take sticks from one pile. ",
|
32 |
+
suffix="Input: {text_game_state}\nOutput:",
|
33 |
+
input_variables=["text_game_state"],
|
34 |
+
example_separator="\n\n"
|
35 |
+
)
|
36 |
+
|
37 |
+
EXEC_MOVE_PROMPT_EXAMPLES = [
|
38 |
+
{"input": "I'll take two sticks from pile A", "output": "0,2"},
|
39 |
+
{"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
|
40 |
+
{"input": "I'll take two sticks from pile C", "output": "2,2"},
|
41 |
+
{"input": "I'll take one stick from the third pile", "output": "2,1"},
|
42 |
+
{"input": "From pile B remove 2 sticks", "output": "1,2"},
|
43 |
+
{"input": "I'll take the last stick from pile C", "output": "2,1"},
|
44 |
+
]
|
45 |
+
|
46 |
+
EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
47 |
+
examples=EXEC_MOVE_PROMPT_EXAMPLES,
|
48 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
49 |
+
prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
|
50 |
+
"number and the second number is the number of sticks to remove.",
|
51 |
+
suffix="Input: {move_to_express}\nOutput:",
|
52 |
+
input_variables=["move_to_express"],
|
53 |
+
example_separator="\n\n"
|
54 |
+
)
|
55 |
+
|
56 |
+
|
57 |
+
def plan_gpt_move(text_game_state, temperature, api_key):
|
58 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
|
59 |
+
openai_api_key=api_key)
|
60 |
+
llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
61 |
+
planned_move = llm_chain.run({'text_game_state': text_game_state}).strip()
|
62 |
+
return planned_move
|
63 |
+
|
64 |
+
|
65 |
+
def plan_qc_move(text_game_state):
|
66 |
+
board_state = [int(s) for s in re.findall(r'\b\d+\b',text_game_state)]
|
67 |
+
board_state.append(0)
|
68 |
+
pile_choice, amount = get_quantum_move(board_state)
|
69 |
+
if amount > 1:
|
70 |
+
planned_move = "I'll take " + p.number_to_words(amount) + " sticks from pile " + chr(pile_choice+65) + "."
|
71 |
+
else:
|
72 |
+
planned_move = "I'll take " + p.number_to_words(amount) + " stick from pile " + chr(pile_choice+65) + "."
|
73 |
+
return planned_move
|
74 |
+
|
75 |
+
|
76 |
+
def execute_move(move_to_express, nim_game_env, api_key):
|
77 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
|
78 |
+
openai_api_key=api_key)
|
79 |
+
llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
80 |
+
step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
|
81 |
+
step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
|
82 |
+
try:
|
83 |
+
step_result = nim_game_env.step(step_tuple)
|
84 |
+
except ValueError:
|
85 |
+
return "Invalid move!", [0, 0, 0], 0, True, None
|
86 |
+
|
87 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
|
88 |
+
return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]
|
qnim.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from qiskit import BasicAer, QuantumCircuit, QuantumRegister, ClassicalRegister, execute
|
3 |
+
from qiskit import IBMQ
|
4 |
+
# provider = IBMQ.load_account()
|
5 |
+
|
6 |
+
|
7 |
+
# def misere_step(ones,piles):
|
8 |
+
# # even number of piles of 1 eg (1,1,3,0) or (0,0,3,0)
|
9 |
+
# if ones%2 == 0:
|
10 |
+
# objects_to_remove = []
|
11 |
+
# removable_amount = 1
|
12 |
+
# for i in range(len(piles)):
|
13 |
+
# if piles[i] > 1:
|
14 |
+
# objects_to_remove.append(piles[i]-1)
|
15 |
+
# else:
|
16 |
+
# objects_to_remove.append(0)
|
17 |
+
# # odd number of piles of 1 eg (1,1,3,1)
|
18 |
+
# else:
|
19 |
+
# objects_to_remove = []
|
20 |
+
# removable_amount = 1
|
21 |
+
# for i in range(len(piles)):
|
22 |
+
# if piles[i] > 1:
|
23 |
+
# objects_to_remove.append(piles[i])
|
24 |
+
# else:
|
25 |
+
# objects_to_remove.append(0)
|
26 |
+
# return objects_to_remove, removable_amount
|
27 |
+
|
28 |
+
def get_piles_to_remove(piles):
|
29 |
+
nim_sum = 0
|
30 |
+
for p in piles:
|
31 |
+
nim_sum = nim_sum ^ p
|
32 |
+
objects_to_remove = []
|
33 |
+
removable_amount = 0
|
34 |
+
for p in piles:
|
35 |
+
new_p = p^nim_sum
|
36 |
+
if new_p < p:
|
37 |
+
objects_to_remove.append(p-new_p)
|
38 |
+
removable_amount = removable_amount + 1
|
39 |
+
else:
|
40 |
+
objects_to_remove.append(0)
|
41 |
+
return objects_to_remove, removable_amount
|
42 |
+
|
43 |
+
|
44 |
+
def custom_qft(data_qubits):
|
45 |
+
qr_data = QuantumRegister(data_qubits)
|
46 |
+
qc = QuantumCircuit(qr_data)
|
47 |
+
i = data_qubits
|
48 |
+
while i>=1:
|
49 |
+
n = i - 1
|
50 |
+
qc.h(qr_data[n])
|
51 |
+
for qubit in range(n):
|
52 |
+
qc.cp(np.pi/2**(n-qubit), qr_data[qubit], qr_data[n])
|
53 |
+
i = i-1
|
54 |
+
return qc
|
55 |
+
|
56 |
+
def subroutine_add_const(data_qubits: int, const: int, to_gate=True):
|
57 |
+
qc = QuantumCircuit(data_qubits)
|
58 |
+
for i in range(data_qubits):
|
59 |
+
angle = const*np.pi/(2**i)
|
60 |
+
qc.p(angle,i)
|
61 |
+
return qc.to_gate(label=" ["+str(const)+"] ") if to_gate else qc
|
62 |
+
|
63 |
+
def diffusion_operation(qc, address, flag, removable_pile):
|
64 |
+
def nim_oracle(qc,address,flag,removable_pile):
|
65 |
+
|
66 |
+
# 0001 -> 001
|
67 |
+
if removable_pile[0] != 0:
|
68 |
+
qc.x(address[1])
|
69 |
+
qc.x(address[2])
|
70 |
+
qc.mct(address[:],flag)
|
71 |
+
qc.x(address[2])
|
72 |
+
qc.x(address[1])
|
73 |
+
|
74 |
+
# 0010 -> 010
|
75 |
+
if removable_pile[1] != 0:
|
76 |
+
qc.x(address[0])
|
77 |
+
qc.x(address[2])
|
78 |
+
qc.mct(address[:],flag)
|
79 |
+
qc.x(address[2])
|
80 |
+
qc.x(address[0])
|
81 |
+
|
82 |
+
# 0100 -> 011
|
83 |
+
if removable_pile[2] != 0:
|
84 |
+
qc.x(address[2])
|
85 |
+
qc.mct(address[:],flag)
|
86 |
+
qc.x(address[2])
|
87 |
+
|
88 |
+
# 1000 -> 100
|
89 |
+
if removable_pile[3] != 0:
|
90 |
+
qc.x(address[0])
|
91 |
+
qc.x(address[1])
|
92 |
+
qc.mct(address[:],flag)
|
93 |
+
qc.x(address[1])
|
94 |
+
qc.x(address[0])
|
95 |
+
|
96 |
+
|
97 |
+
qc.x(flag)
|
98 |
+
qc.h(flag)
|
99 |
+
|
100 |
+
qc.h(address[:])
|
101 |
+
nim_oracle(qc,address,flag,removable_pile)
|
102 |
+
qc.h(address[:])
|
103 |
+
qc.x(address[:])
|
104 |
+
qc.h(address[2])
|
105 |
+
qc.mct(address[0:2], address[2])
|
106 |
+
qc.h(address[2])
|
107 |
+
qc.x(address[:])
|
108 |
+
qc.h(address[:])
|
109 |
+
|
110 |
+
|
111 |
+
def qc_process(qc,objects_to_remove,address,flag,piles,removable_pile,removable_count):
|
112 |
+
|
113 |
+
if removable_count == 0:
|
114 |
+
for i in range(len(removable_pile)):
|
115 |
+
if piles[i] > 0:
|
116 |
+
removable_pile[i] = 1
|
117 |
+
removable_count += 1
|
118 |
+
|
119 |
+
if removable_count == 4:
|
120 |
+
removable_pile[removable_pile.index(min(removable_pile))] = 0
|
121 |
+
removable_count = removable_count - 1
|
122 |
+
|
123 |
+
|
124 |
+
qft_gate = custom_qft(3).to_gate()
|
125 |
+
inverse_qft_gate = custom_qft(3).inverse().to_gate()
|
126 |
+
|
127 |
+
if removable_count == 1:
|
128 |
+
qc.swap(objects_to_remove[0],objects_to_remove[2])
|
129 |
+
qc.append(qft_gate,objects_to_remove[:])
|
130 |
+
# 0001 -> 001
|
131 |
+
if removable_pile[0] != 0:
|
132 |
+
add_gate = subroutine_add_const(3,removable_pile[0])
|
133 |
+
qc.x(address[0])
|
134 |
+
# 0010 -> 010
|
135 |
+
elif removable_pile[1] != 0:
|
136 |
+
add_gate = subroutine_add_const(3,removable_pile[1])
|
137 |
+
qc.x(address[1])
|
138 |
+
# 0100 -> 011
|
139 |
+
elif removable_pile[2] != 0:
|
140 |
+
add_gate = subroutine_add_const(3,removable_pile[2])
|
141 |
+
qc.x(address[0])
|
142 |
+
qc.x(address[1])
|
143 |
+
# 1000 -> 100
|
144 |
+
elif removable_pile[3] != 0:
|
145 |
+
add_gate = subroutine_add_const(3,removable_pile[3])
|
146 |
+
qc.x(address[2])
|
147 |
+
|
148 |
+
qc.append(add_gate,objects_to_remove[:])
|
149 |
+
qc.append(inverse_qft_gate,objects_to_remove[:])
|
150 |
+
qc.swap(objects_to_remove[0],objects_to_remove[2])
|
151 |
+
|
152 |
+
else:
|
153 |
+
diffusion_operation(qc,address, flag, removable_pile)
|
154 |
+
qc.swap(objects_to_remove[0],objects_to_remove[2])
|
155 |
+
qc.append(qft_gate,objects_to_remove[:])
|
156 |
+
for i,remove_amount in enumerate(removable_pile):
|
157 |
+
if remove_amount != 0:
|
158 |
+
|
159 |
+
bin_i = list(bin(i+1)[2:])
|
160 |
+
while len(bin_i) != 3:
|
161 |
+
bin_i.insert(0,'0')
|
162 |
+
bin_i = bin_i[::-1]
|
163 |
+
for j in range(len(bin_i)):
|
164 |
+
if bin_i[j] == '0':
|
165 |
+
qc.x(address[j])
|
166 |
+
|
167 |
+
controlled_add_gate = subroutine_add_const(3,remove_amount).control(3)
|
168 |
+
qc.append(controlled_add_gate,address[:]+objects_to_remove[:])
|
169 |
+
|
170 |
+
for j in range(len(bin_i)):
|
171 |
+
if bin_i[j] == '0':
|
172 |
+
qc.x(address[j])
|
173 |
+
|
174 |
+
qc.append(inverse_qft_gate,objects_to_remove[:])
|
175 |
+
qc.swap(objects_to_remove[0],objects_to_remove[2])
|
176 |
+
|
177 |
+
def get_quantum_move(piles, backend=None):
|
178 |
+
|
179 |
+
# REMOVE MISERE STEP
|
180 |
+
# ones = piles.count(1)
|
181 |
+
# zeros = piles.count(0)
|
182 |
+
# non_zeros = 4 - (ones+zeros)
|
183 |
+
|
184 |
+
# # all zeros except one eg (0,0,0,7) OR some zeros some ones some non_zeros
|
185 |
+
# # leave odd piles of 1s
|
186 |
+
# if non_zeros == 1:
|
187 |
+
# removable_pile, removable_count = misere_step(ones, piles)
|
188 |
+
# else:
|
189 |
+
# removable_pile, removable_count = get_piles_to_remove(piles)
|
190 |
+
|
191 |
+
|
192 |
+
removable_pile, removable_count = get_piles_to_remove(piles)
|
193 |
+
objects_to_remove = QuantumRegister(3,'piles')
|
194 |
+
flag = QuantumRegister(1,'flag')
|
195 |
+
output_piles = ClassicalRegister(3,'final_piles')
|
196 |
+
address = QuantumRegister(3,'address')
|
197 |
+
pick_pile = ClassicalRegister(3,'choose_pile')
|
198 |
+
qc = QuantumCircuit(objects_to_remove,address,flag,output_piles,pick_pile)
|
199 |
+
qc_process(qc,objects_to_remove,address,flag,piles,removable_pile,removable_count)
|
200 |
+
|
201 |
+
qc.measure(address[:],pick_pile[:])
|
202 |
+
qc.measure(objects_to_remove[:],output_piles[:])
|
203 |
+
|
204 |
+
if backend == None:
|
205 |
+
backend = BasicAer.get_backend('qasm_simulator')
|
206 |
+
# backend = provider.backends.ibmq_qasm_simulator
|
207 |
+
job = execute(qc,backend,shots=500)
|
208 |
+
result = job.result()
|
209 |
+
counts = result.get_counts()
|
210 |
+
|
211 |
+
try:
|
212 |
+
qc_move = (counts.most_frequent())
|
213 |
+
except Exception as e:
|
214 |
+
print(e)
|
215 |
+
vals = list(dict(counts).values())
|
216 |
+
max_count = max(vals,key=vals.count)
|
217 |
+
for key in counts:
|
218 |
+
if counts[key] == max_count:
|
219 |
+
qc_move = key
|
220 |
+
break
|
221 |
+
|
222 |
+
board_choice = qc_move.split(' ')[0]
|
223 |
+
board_choice = int(board_choice,2) - 1
|
224 |
+
|
225 |
+
print("Pick from:",board_choice+1)
|
226 |
+
|
227 |
+
board_state = qc_move.split(' ')[1]
|
228 |
+
board_state = board_state[::-1]
|
229 |
+
amount = int(board_state,2)
|
230 |
+
print("Amount:", amount)
|
231 |
+
return board_choice,amount
|
232 |
+
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
|
237 |
+
|
238 |
+
|
239 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
gradio
|
3 |
+
numpy
|
4 |
+
langchain
|
5 |
+
gymnasium
|
6 |
+
qiskit
|
7 |
+
inflect
|