Jackss commited on
Commit
1dd5fed
1 Parent(s): 12bb371

Added HTML

Browse files
Files changed (4) hide show
  1. Dockerfile +10 -1
  2. main.py +28 -6
  3. requirements.txt +2 -1
  4. static/index.html +10 -0
Dockerfile CHANGED
@@ -6,6 +6,15 @@ COPY ./requirements.txt /code/requirements.txt
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
 
9
- COPY . .
 
 
 
 
 
 
 
 
 
10
 
11
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
 
9
+ RUN useradd -m -u 1000 user
10
+
11
+ USER user
12
+
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ WORKDIR $HOME/app
17
+
18
+ COPY --chown=user . $HOME/app
19
 
20
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py CHANGED
@@ -1,14 +1,36 @@
1
  from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- model_name = 'allenai/specter'
4
 
5
  app = FastAPI()
6
 
7
- @app.get('/')
8
- def read_root():
9
- return {"Hello": "World!"}
 
 
10
 
11
  @app.get('/similarity')
12
  def similarity(input):
13
- output = pipe_flan(input)
14
- return {"output": output[0]["generated_text"]}
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.responses import FileResponse
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import numpy as np
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+
8
+ # load model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained('allenai/specter')
10
+ model = AutoModel.from_pretrained('allenai/specter')
11
+
12
+ # papers = [{'title': 'BERT', 'abstract': 'We introduce a new language representation model called BERT'},
13
+ # {'title': 'Attention is all you need', 'abstract': ' The dominant sequence transduction models are based on complex recurrent or convolutional neural networks'}]
14
+
15
+ # concatenate title and abstract
16
 
 
17
 
18
  app = FastAPI()
19
 
20
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
21
+
22
+ @app.get("/")
23
+ def index() -> FileResponse:
24
+ return FileResponse(path="/app/static/index.html", media_type="text/html")
25
 
26
  @app.get('/similarity')
27
  def similarity(input):
28
+ papers = input['papers']
29
+ title_abs = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
30
+ # preprocess the input
31
+ inputs = tokenizer(title_abs, padding=True, truncation=True, return_tensors="pt", max_length=512)
32
+ result = model(**inputs)
33
+ # take the first token in the batch as the embedding
34
+ embeddings = result.last_hidden_state[:, 0, :]
35
+ res = cosine_similarity(embeddings, embeddings).tolist()
36
+ return {"output": res}
requirements.txt CHANGED
@@ -3,4 +3,5 @@ requests==2.27.*
3
  sentencepiece==0.1.*
4
  torch==1.11.*
5
  transformers==4.*
6
- uvicorn[standard]==0.17.*
 
 
3
  sentencepiece==0.1.*
4
  torch==1.11.*
5
  transformers==4.*
6
+ uvicorn[standard]==0.17.*
7
+ scikit-learn==1.2.*
static/index.html ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <html lang="en">
2
+ <head>
3
+ <meta charset="UTF-8">
4
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
5
+ <title>Document</title>
6
+ </head>
7
+ <body>
8
+ Hello world!
9
+ </body>
10
+ </html>