File size: 3,809 Bytes
b0829c1
 
 
8c15345
b0829c1
 
 
 
 
ae88252
 
b0829c1
 
 
 
8c15345
 
b0829c1
 
 
 
 
 
 
 
 
 
 
 
 
8c15345
b0829c1
 
 
 
 
8c15345
b0829c1
 
 
 
8c15345
b0829c1
 
 
 
8c15345
b0829c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

import numpy as np
import re
import spacy
import openai
from operator import itemgetter
#user input manager class
class input_manager:
  
  #initialize key dictionary from vector data frame
  def __init__(self,key_df, slim_df, search_tokens):
        self.key_df = key_df
        self.slim_df = slim_df
        self.search_tokens = search_tokens
        self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
        self.nlp = spacy.load("en_core_web_md")

  #translate input text to vector
  def set_input(self,input_cats):
    #need setup to apply correct group tag to values
    #separate known/unknown features
    k_flags = [cat for cat in input_cats if cat in list(self.key.keys())]
    unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())]
    
    #process within feature class similarity for each unknown input
    if len(unk_flags)>0:
      
      outs = []
      for word in unk_flags:
        if re.match(r"game_type_",word):
          tok = self.nlp(word.split("_")[-1])
          mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1))
          #if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive
          if mtch[1]>0:
            outs.append("game_type_"+mtch[0])
        elif re.match(r"mechanic_",word):
          tok = self.nlp(word.split("_")[-1])
          mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1))
          if mtch[1]>0:
            outs.append("mechanic_"+mtch[0])
        elif re.match(r"category_",word):
          tok = self.nlp(word.split("_")[-1])
          mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1))
          if mtch[1]>0:
            outs.append("category_"+mtch[0])
        elif re.match(r"family_",word):
          tok = self.nlp(word.split("_")[-1])
          mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1))
          if mtch[1]>0:
            outs.append("family_"+str(mtch[0]))
      
      #if unks are processed, rejoin nearest match to known.
      k_flags = list(set(k_flags+outs))
    
    #preserve global key and ouput copy w/input keys activated to 1
    d = self.key.copy()
    for cat in k_flags:
      d[cat] = 1.0
    
    # DELETE ME
    return d

  def input_parser(self,in_vec):
    #extracting keys from processed vector
    ks = [k for k,v in in_vec.items() if v == 1]

    return ks
  
class model_control:
  def __init__(self, apikey, model_id):
    self.api_key = apikey
    openai.api_key = self.api_key

    self.prompt = None
    
    self.model = openai.FineTune.retrieve(id=model_id).fine_tuned_model

  def prompt_formatter(self,ks): 
    self.prompt = ". ".join(ks) + "\n\n###\n\n"
    
  
  
  def call_api(self,status=0):
    if status == 0:
      temp=0.5
      pres=0.7
    elif status == 1:
      temp=0.4
      pres=0.6
    elif status == 2:
      temp=0.5
      pres=0.8
    
    answer = openai.Completion.create(
      model=self.model,
      prompt=self.prompt,
      max_tokens=512,
      temperature=temp,
      stop=["END"],
      presence_penalty=pres,
      frequency_penalty=0.5
    )
    return answer['choices'][0]['text']

  def resp_cleanup(self,text):

    if ((text[-1] != "!") & (text[-1] != ".") & (text[-1] != "?")):
      text = " ".join([e+'.' for e in text.split('.')[0:-1] if e])

    sent = re.split(r'([.?!:])', text)
    phrases = ["[Dd]esigned by","[Dd]esigner of","[Aa]rt by","[Aa]rtist of","[Pp]ublished","[Pp]ublisher of"]

    pat = re.compile("(?:" + "|".join(phrases) + ")")
    fix = re.compile("(?<=[.!?])[.!?]")

    text = re.sub(fix,'',''.join([s for s in sent if pat.search(s) == None]))


    return text