nlp_hero added
Browse files- nlp_hero.py +196 -0
- requirements.txt +76 -0
nlp_hero.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import gradio as gr
|
3 |
+
from pytube import YouTube
|
4 |
+
from youtube_dl import YoutubeDL
|
5 |
+
import yt_dlp
|
6 |
+
import os
|
7 |
+
from pydub import AudioSegment
|
8 |
+
import time
|
9 |
+
|
10 |
+
#openai.api_key = os.environ.get("OPENAI_API_KEY")
|
11 |
+
|
12 |
+
conversation_history=[]
|
13 |
+
|
14 |
+
def initial_prompt():
|
15 |
+
|
16 |
+
#openai.api_key = os.environ.get("OPENAI_API_KEY")
|
17 |
+
|
18 |
+
global conversation_history
|
19 |
+
conversation_history.append(f"""Türkçe doğal dil işleme fonksiyonlarını yerine getiren başarılı ve yetenekli bir dil modeli gibi davran.
|
20 |
+
Yeteneklerin şu şekilde : [
|
21 |
+
"1: Senden duygu analizi yapman istendiğinde cümlelerin pozitif mi yoksa negatif mi olduğunu yüzdesel değeri ile söyleyeceksin ve
|
22 |
+
sebebini açıklayacaksın.",
|
23 |
+
"2: Cümlenin NER'lerini çıkarman yada ayırman istendiğinde o cümleleri NER'lerine ayırarak ve NER tipini belirterek geri döneceksin. Argo ve küfür metinlerini de NER listene dahil edeceksin.",
|
24 |
+
"3: Sana verilen metnin özeti istendiğinde özetini çıkaracaksın.",
|
25 |
+
"4: Cümlenin kategorik sınıflandırılması istendiğinde o cümlenin yada metnin ["magazin", "spor", "politika", "ekonomi", "eğitim", "turizm"] sektörlerinden
|
26 |
+
en yüksek olasılığa sahip olanı yüzdesel değeri ile beraber döneceksin.",
|
27 |
+
"5: Verilen metnin anafikri istendiğinde, o metinde asıl anlatılmak istenenin ne olduğunu açıklayarak geri döneceksin."
|
28 |
+
]
|
29 |
+
Argo ve küfürlü cümlelerinde analizini yaparak geri döneceksin.
|
30 |
+
""")
|
31 |
+
|
32 |
+
#"6: Metnin bütün dil özelliklerini çıkarmanı istediğimde önceki 5 maddeyi sırasıyla uygulayacak ve sonuclarını döneceksin."
|
33 |
+
|
34 |
+
while True:
|
35 |
+
|
36 |
+
try:
|
37 |
+
completion = openai.ChatCompletion.create(
|
38 |
+
model="gpt-4",
|
39 |
+
messages=[{"role": "system", "content": " ".join([str(item) for item in conversation_history])}],
|
40 |
+
temperature=0.7,
|
41 |
+
max_tokens=7500,
|
42 |
+
)
|
43 |
+
|
44 |
+
message = completion.choices[0].message['content']
|
45 |
+
print(message)
|
46 |
+
|
47 |
+
break
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
time.sleep(10)
|
51 |
+
continue
|
52 |
+
|
53 |
+
#message = completion.choices[0].message['content']
|
54 |
+
print(message)
|
55 |
+
|
56 |
+
|
57 |
+
#initial_prompt()
|
58 |
+
|
59 |
+
def handle_input(
|
60 |
+
input_str : str,
|
61 |
+
temperature,
|
62 |
+
max_tokens,
|
63 |
+
top_p_input,
|
64 |
+
presence_penalty,
|
65 |
+
frequency_penalty
|
66 |
+
):
|
67 |
+
global conversation_history
|
68 |
+
|
69 |
+
if len(conversation_history) == 0:
|
70 |
+
initial_prompt()
|
71 |
+
|
72 |
+
if len(conversation_history) >=3:
|
73 |
+
conversation_history = conversation_history[:1] #+ conversation_history[-5:]
|
74 |
+
|
75 |
+
conversation_history.append(f"{input_str}\n")
|
76 |
+
|
77 |
+
content = " ".join([str(item) for item in conversation_history])
|
78 |
+
|
79 |
+
while True:
|
80 |
+
|
81 |
+
try:
|
82 |
+
|
83 |
+
completion = openai.ChatCompletion.create(
|
84 |
+
model="gpt-4",
|
85 |
+
messages=[{"role": "assistant", "content": content}],
|
86 |
+
temperature=temperature,
|
87 |
+
max_tokens=max_tokens,
|
88 |
+
presence_penalty=presence_penalty,
|
89 |
+
frequency_penalty=frequency_penalty,
|
90 |
+
top_p = top_p_input,
|
91 |
+
#stream = stream_input
|
92 |
+
)
|
93 |
+
|
94 |
+
message = completion.choices[0].message['content']
|
95 |
+
break
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
time.sleep(5)
|
99 |
+
continue
|
100 |
+
|
101 |
+
conversation_history.append(f"{message}\n")
|
102 |
+
|
103 |
+
return message
|
104 |
+
|
105 |
+
def get_model_reply(query,api_key,temperature, max_tokens,top_p_input, presence_penalty, frequency_penalty,context=[]):
|
106 |
+
|
107 |
+
context += [query]
|
108 |
+
|
109 |
+
openai.api_key = api_key
|
110 |
+
|
111 |
+
response = handle_input(query, temperature, max_tokens,top_p_input, presence_penalty, frequency_penalty)
|
112 |
+
|
113 |
+
|
114 |
+
context += [response]
|
115 |
+
|
116 |
+
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
|
117 |
+
|
118 |
+
return responses, context
|
119 |
+
|
120 |
+
|
121 |
+
def speech_2_text(audio,api_key, context=[]):
|
122 |
+
openai.api_key = api_key
|
123 |
+
audio_file= open(audio, "rb")
|
124 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
125 |
+
prompt = transcript.text
|
126 |
+
context += [prompt]
|
127 |
+
|
128 |
+
completion = openai.ChatCompletion.create(
|
129 |
+
model="gpt-4",
|
130 |
+
messages=[{"role": "user", "content": prompt}],
|
131 |
+
max_tokens=7500,
|
132 |
+
)
|
133 |
+
|
134 |
+
response = completion.choices[0].message['content']
|
135 |
+
context += [response]
|
136 |
+
|
137 |
+
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
|
138 |
+
|
139 |
+
return responses, context
|
140 |
+
|
141 |
+
def get_audio(url,api_key,context=[]):
|
142 |
+
openai.api_key = api_key
|
143 |
+
destination = f"./audio/test"
|
144 |
+
try:
|
145 |
+
ydl_opts = {
|
146 |
+
'format': 'bestaudio/best',
|
147 |
+
'outtmpl':destination,
|
148 |
+
'postprocessors': [{
|
149 |
+
'key': 'FFmpegExtractAudio',
|
150 |
+
'preferredcodec': 'mp3',
|
151 |
+
'preferredquality': '192',
|
152 |
+
}],
|
153 |
+
}
|
154 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
155 |
+
ydl.download([url])
|
156 |
+
|
157 |
+
audio_file= open(f'{destination}.mp3', "rb")
|
158 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
159 |
+
|
160 |
+
context += [transcript.text]
|
161 |
+
|
162 |
+
responses = [(u,b) for u,b in zip(context[::2], context[1::2])]
|
163 |
+
|
164 |
+
return responses, context
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
print("Connection Error")
|
168 |
+
|
169 |
+
|
170 |
+
with gr.Blocks(css="#chatbot {overflow-y:auto; min-height:400px;} ") as dialog_app:
|
171 |
+
|
172 |
+
with gr.Tab("ChatBot"):
|
173 |
+
with gr.Row():
|
174 |
+
with gr.Column(scale=1, min_width=600):
|
175 |
+
api_key = gr.Textbox(label="Your API Key", type="password")
|
176 |
+
temperature_input = gr.inputs.Slider(minimum=0, maximum=1.0, default=0.5, step=0.01, label="Temperature")
|
177 |
+
max_tokens_input = gr.inputs.Slider(minimum=1, maximum=7500, default=50, step=10, label="Max Tokens")
|
178 |
+
top_p_input = gr.inputs.Slider(minimum=0.1, maximum=1.0, default=0.5, step=0.01, label="Top P")
|
179 |
+
presence_penalty_input = gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.0, step=0.1, label="Presence Penalty")
|
180 |
+
frequency_penalty_input = gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.0, step=0.1, label="Frequency Penalty")
|
181 |
+
#stream_input = gr.inputs.Checkbox(label="Stream")
|
182 |
+
|
183 |
+
with gr.Column(scale=2, min_width=600):
|
184 |
+
chatbot = gr.Chatbot(elem_id="chatbot")
|
185 |
+
state = gr.State([])
|
186 |
+
|
187 |
+
txt = gr.Textbox(
|
188 |
+
show_label=False,
|
189 |
+
placeholder="Enter text and press enter"
|
190 |
+
).style(container=False)
|
191 |
+
|
192 |
+
txt.submit(get_model_reply, [txt, api_key, temperature_input, max_tokens_input,top_p_input, presence_penalty_input,frequency_penalty_input, state], [chatbot, state])
|
193 |
+
txt.submit(lambda :"", None, txt)
|
194 |
+
|
195 |
+
|
196 |
+
dialog_app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.1.0
|
2 |
+
aiohttp==3.8.4
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==4.2.2
|
5 |
+
anyio==3.6.2
|
6 |
+
async-timeout==4.0.2
|
7 |
+
attrs==22.2.0
|
8 |
+
Brotli==1.0.9
|
9 |
+
certifi @ file:///C:/b/abs_85o_6fm0se/croot/certifi_1671487778835/work/certifi
|
10 |
+
charset-normalizer==3.1.0
|
11 |
+
click==8.1.3
|
12 |
+
colorama==0.4.6
|
13 |
+
contourpy==1.0.7
|
14 |
+
cycler==0.11.0
|
15 |
+
entrypoints==0.4
|
16 |
+
fastapi==0.95.0
|
17 |
+
ffmpy==0.3.0
|
18 |
+
filelock==3.10.7
|
19 |
+
fonttools==4.39.2
|
20 |
+
frozenlist==1.3.3
|
21 |
+
fsspec==2023.3.0
|
22 |
+
gradio==3.23.0
|
23 |
+
h11==0.14.0
|
24 |
+
httpcore==0.16.3
|
25 |
+
httpx==0.23.3
|
26 |
+
huggingface-hub==0.13.3
|
27 |
+
idna==3.4
|
28 |
+
importlib-resources==5.12.0
|
29 |
+
Jinja2==3.1.2
|
30 |
+
jsonschema==4.17.3
|
31 |
+
keras-tuner==1.2.1
|
32 |
+
kiwisolver==1.4.4
|
33 |
+
linkify-it-py==2.0.0
|
34 |
+
markdown-it-py==2.2.0
|
35 |
+
MarkupSafe==2.1.2
|
36 |
+
matplotlib==3.7.1
|
37 |
+
mdit-py-plugins==0.3.3
|
38 |
+
mdurl==0.1.2
|
39 |
+
multidict==6.0.4
|
40 |
+
mutagen==1.46.0
|
41 |
+
numpy==1.24.2
|
42 |
+
openai==0.27.2
|
43 |
+
orjson==3.8.8
|
44 |
+
packaging==23.0
|
45 |
+
pandas==1.5.3
|
46 |
+
Pillow==9.4.0
|
47 |
+
pycryptodomex==3.17
|
48 |
+
pydantic==1.10.7
|
49 |
+
pydub==0.25.1
|
50 |
+
pyparsing==3.0.9
|
51 |
+
pyrsistent==0.19.3
|
52 |
+
python-dateutil==2.8.2
|
53 |
+
python-multipart==0.0.6
|
54 |
+
pytube==12.1.3
|
55 |
+
pytz==2023.2
|
56 |
+
PyYAML==6.0
|
57 |
+
requests==2.28.2
|
58 |
+
rfc3986==1.5.0
|
59 |
+
semantic-version==2.10.0
|
60 |
+
six==1.16.0
|
61 |
+
sniffio==1.3.0
|
62 |
+
starlette==0.26.1
|
63 |
+
tensorflow==2.11.0
|
64 |
+
tensorflow-intel==2.11.0
|
65 |
+
toolz==0.12.0
|
66 |
+
tqdm==4.65.0
|
67 |
+
typing_extensions==4.5.0
|
68 |
+
uc-micro-py==1.0.1
|
69 |
+
urllib3==1.26.15
|
70 |
+
uvicorn==0.21.1
|
71 |
+
websockets==10.4
|
72 |
+
wincertstore==0.2
|
73 |
+
yarl==1.8.2
|
74 |
+
youtube-dl==2021.12.17
|
75 |
+
yt-dlp==2023.3.4
|
76 |
+
zipp==3.15.0
|