liujch1998 commited on
Commit
5f2c7e6
1 Parent(s): 9282a5a

Sync changes

Browse files
Files changed (2) hide show
  1. app.py +57 -10
  2. constants.py +1 -2
app.py CHANGED
@@ -31,26 +31,73 @@ def process(query_type, corpus_desc, engine_desc, query, maxnum, request: gr.Req
31
 
32
  def count(corpus_desc, engine_desc, query, request: gr.Request):
33
  result = process('count', corpus_desc, engine_desc, query, None, request)
34
- return result.get('latency', ''), result.get('tokenized', ''), result.get('count', '')
 
 
 
 
 
 
 
35
  def prob(corpus_desc, engine_desc, query, request: gr.Request):
36
  result = process('prob', corpus_desc, engine_desc, query, None, request)
37
- return result.get('latency', ''), result.get('tokenized', ''), result.get('probability', '')
 
 
 
 
 
 
 
38
  def ntd(corpus_desc, engine_desc, query, request: gr.Request):
39
  result = process('ntd', corpus_desc, engine_desc, query, None, request)
40
- return result.get('latency', ''), result.get('tokenized', ''), result.get('distribution', '')
 
 
 
 
 
 
 
41
  def infgram_prob(corpus_desc, engine_desc, query, request: gr.Request):
42
  result = process('infgram_prob', corpus_desc, engine_desc, query, None, request)
43
- return result.get('latency', ''), result.get('tokenized', ''), result.get('longest_suffix', ''), result.get('probability', '')
 
 
 
 
 
 
 
 
 
44
  def infgram_ntd(corpus_desc, engine_desc, query, request: gr.Request):
45
  result = process('infgram_ntd', corpus_desc, engine_desc, query, None, request)
46
- return result.get('latency', ''), result.get('tokenized', ''), result.get('longest_suffix', ''), result.get('distribution', '')
 
 
 
 
 
 
 
 
 
47
  def search_docs(corpus_desc, engine_desc, query, maxnum, request: gr.Request):
48
  result = process('search_docs', corpus_desc, engine_desc, query, maxnum, request)
49
- outputs = result.get('outputs', [])
50
- outputs = outputs[:maxnum]
51
- while len(outputs) < 10:
52
- outputs.append([])
53
- return result.get('latency', ''), result.get('tokenized', ''), result.get('message', ''), outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]
 
 
 
 
 
 
 
 
54
  def analyze_document(corpus_desc, engine_desc, query, request: gr.Request):
55
  result = process('analyze_document', corpus_desc, engine_desc, query, None, request)
56
  return result.get('latency', ''), result.get('html', '')
 
31
 
32
  def count(corpus_desc, engine_desc, query, request: gr.Request):
33
  result = process('count', corpus_desc, engine_desc, query, None, request)
34
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
35
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
36
+ if 'error' in result:
37
+ count = result['error']
38
+ else:
39
+ count = f'{result["count"]:,}'
40
+ return latency, tokenized, count
41
+
42
  def prob(corpus_desc, engine_desc, query, request: gr.Request):
43
  result = process('prob', corpus_desc, engine_desc, query, None, request)
44
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
45
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
46
+ if 'error' in result:
47
+ prob = result['error']
48
+ else:
49
+ prob = f'{result["prob"]:.4f} ({result["cont_cnt"]:,} / {result["prompt_cnt"]:,})'
50
+ return latency, tokenized, prob
51
+
52
  def ntd(corpus_desc, engine_desc, query, request: gr.Request):
53
  result = process('ntd', corpus_desc, engine_desc, query, None, request)
54
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
55
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
56
+ if 'error' in result:
57
+ ntd = result['error']
58
+ else:
59
+ ntd = result['ntd']
60
+ return latency, tokenized, ntd
61
+
62
  def infgram_prob(corpus_desc, engine_desc, query, request: gr.Request):
63
  result = process('infgram_prob', corpus_desc, engine_desc, query, None, request)
64
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
65
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
66
+ if 'error' in result:
67
+ longest_suffix = ''
68
+ prob = result['error']
69
+ else:
70
+ longest_suffix = result['longest_suffix']
71
+ prob = f'{result["prob"]:.4f} ({result["cont_cnt"]:,} / {result["prompt_cnt"]:,})'
72
+ return latency, tokenized, longest_suffix, prob
73
+
74
  def infgram_ntd(corpus_desc, engine_desc, query, request: gr.Request):
75
  result = process('infgram_ntd', corpus_desc, engine_desc, query, None, request)
76
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
77
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
78
+ if 'error' in result:
79
+ longest_suffix = ''
80
+ ntd = result['error']
81
+ else:
82
+ longest_suffix = result['longest_suffix']
83
+ ntd = result['ntd']
84
+ return latency, tokenized, longest_suffix, ntd
85
+
86
  def search_docs(corpus_desc, engine_desc, query, maxnum, request: gr.Request):
87
  result = process('search_docs', corpus_desc, engine_desc, query, maxnum, request)
88
+ latency = '' if 'latency' not in result else f'{result["latency"]:.3f}'
89
+ tokenized = '' if 'tokenized' not in result else result['tokenized']
90
+ if 'error' in result:
91
+ message = result['error']
92
+ docs = [[] for _ in range(10)]
93
+ else:
94
+ message = result['message']
95
+ docs = result['docs']
96
+ docs = docs[:maxnum]
97
+ while len(docs) < 10:
98
+ docs.append([])
99
+ return latency, tokenized, message, docs[0], docs[1], docs[2], docs[3], docs[4], docs[5], docs[6], docs[7], docs[8], docs[9]
100
+
101
  def analyze_document(corpus_desc, engine_desc, query, request: gr.Request):
102
  result = process('analyze_document', corpus_desc, engine_desc, query, None, request)
103
  return result.get('latency', ''), result.get('html', '')
constants.py CHANGED
@@ -18,7 +18,7 @@ ENGINES = list(ENGINE_BY_DESC.values())
18
  MAX_QUERY_CHARS = int(os.environ.get('MAX_QUERY_CHARS', 1000))
19
  MAX_INPUT_DOC_TOKENS = int(os.environ.get('MAX_INPUT_DOC_TOKENS', 1000))
20
  MAX_OUTPUT_DOC_TOKENS = int(os.environ.get('MAX_OUTPUT_DOC_TOKENS', 5000))
21
- MAX_OUTPUT_NUM_DOCS = int(os.environ.get('MAX_OUTPUT_NUM_DOCS', 10))
22
  MAX_CNT_FOR_NTD = int(os.environ.get('MAX_CNT_FOR_NTD', 1000))
23
  MAX_CLAUSE_FREQ = int(os.environ.get('MAX_CLAUSE_FREQ', 10000))
24
  MAX_CLAUSE_FREQ_FAST = int(os.environ.get('MAX_CLAUSE_FREQ_FAST', 1000000))
@@ -34,7 +34,6 @@ DEFAULT_CONCURRENCY_LIMIT = os.environ.get('DEFAULT_CONCURRENCY_LIMIT', 10)
34
  MAX_SIZE = os.environ.get('MAX_SIZE', 100)
35
  MAX_THREADS = os.environ.get('MAX_THREADS', 40)
36
  DEBUG = (os.environ.get('DEBUG', 'False') != 'False')
37
- MIN_QUERY_INTERVAL_SECONDS = int(os.environ.get('MIN_QUERY_INTERVAL_SECONDS', 5))
38
 
39
  # C++ engine
40
  CPP_PORT = int(os.environ.get('CPP_PORT', 3786))
 
18
  MAX_QUERY_CHARS = int(os.environ.get('MAX_QUERY_CHARS', 1000))
19
  MAX_INPUT_DOC_TOKENS = int(os.environ.get('MAX_INPUT_DOC_TOKENS', 1000))
20
  MAX_OUTPUT_DOC_TOKENS = int(os.environ.get('MAX_OUTPUT_DOC_TOKENS', 5000))
21
+ MAX_OUTPUT_NUM_DOCS = int(os.environ.get('MAX_OUTPUT_NUM_DOCS', 10)) # This number is also hard-coded in app.py
22
  MAX_CNT_FOR_NTD = int(os.environ.get('MAX_CNT_FOR_NTD', 1000))
23
  MAX_CLAUSE_FREQ = int(os.environ.get('MAX_CLAUSE_FREQ', 10000))
24
  MAX_CLAUSE_FREQ_FAST = int(os.environ.get('MAX_CLAUSE_FREQ_FAST', 1000000))
 
34
  MAX_SIZE = os.environ.get('MAX_SIZE', 100)
35
  MAX_THREADS = os.environ.get('MAX_THREADS', 40)
36
  DEBUG = (os.environ.get('DEBUG', 'False') != 'False')
 
37
 
38
  # C++ engine
39
  CPP_PORT = int(os.environ.get('CPP_PORT', 3786))