Spaces:
Sleeping
Sleeping
YchKhan
commited on
Commit
•
e46c1c6
0
Parent(s):
Duplicate from OrganizedProgrammers/SEPredictor
Browse files- .gitattributes +35 -0
- README.md +13 -0
- app.py +65 -0
- ebd4appdom.xlsx +3 -0
- requirements.txt +7 -0
- templates/index.html +95 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
ebd4appdom.xlsx filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: SEPredictor
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.29.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: OrganizedProgrammers/SEPredictor
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, jsonify, request, render_template
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sentence_transformers import SentenceTransformer, util
|
5 |
+
import torch
|
6 |
+
import re
|
7 |
+
|
8 |
+
app = Flask(__name__)
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
def extract_embeddings(embeddings_str):
|
13 |
+
pattern = r'(-?\d+(?:\.\d+)?(?:[eE]-?\d+)?)'
|
14 |
+
matches = re.findall(pattern, embeddings_str)
|
15 |
+
return list(map(float, matches))
|
16 |
+
|
17 |
+
df = pd.read_excel("ebd4appdom.xlsx")
|
18 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
19 |
+
df['Embeddings'] = df['Embeddings'].apply(extract_embeddings)
|
20 |
+
descriptions_embeddings = list(df.Embeddings)
|
21 |
+
patnums = list(df["Number"])
|
22 |
+
standards = list(df["Standards"])
|
23 |
+
urls = list(df["URL"])
|
24 |
+
descriptions = list(df.Description)
|
25 |
+
|
26 |
+
def split_string(s, max_len, overlap, min_words_count=0):
|
27 |
+
words = s.split()
|
28 |
+
substrings = []
|
29 |
+
start = 0
|
30 |
+
while start + max_len < len(words):
|
31 |
+
end = start + max_len
|
32 |
+
substring = " ".join(words[start:end])
|
33 |
+
substrings.append(substring)
|
34 |
+
start = end - overlap
|
35 |
+
substrings.append(" ".join(words[start:]))
|
36 |
+
long_substrings = []
|
37 |
+
for string in substrings:
|
38 |
+
if len(string.split()) > min_words_count:
|
39 |
+
long_substrings.append(string)
|
40 |
+
return long_substrings
|
41 |
+
|
42 |
+
@app.route('/', methods=['GET', 'POST'])
|
43 |
+
def index():
|
44 |
+
if request.method == 'POST':
|
45 |
+
query = request.form['query']
|
46 |
+
user_samples = split_string(query, 80, 3)
|
47 |
+
top_k = min(5, len(descriptions))
|
48 |
+
results = []
|
49 |
+
cpt=0
|
50 |
+
for user_sample in user_samples:
|
51 |
+
sp=[[user_sample, 'sample' + str(cpt)]]
|
52 |
+
sample_embedding = embedder.encode(user_sample, convert_to_tensor=True)
|
53 |
+
cos_scores = util.cos_sim(sample_embedding, descriptions_embeddings)[0]
|
54 |
+
top_results = torch.topk(cos_scores, top_k)
|
55 |
+
for score, idx in zip(top_results[0], top_results[1]):
|
56 |
+
my_dict = dict(score= round(float(score.item()), 4), standards=standards[idx], desc=descriptions[idx], url=urls[idx])
|
57 |
+
sp.append(my_dict)
|
58 |
+
results.append(sp)
|
59 |
+
cpt += 1
|
60 |
+
return render_template('index.html', results=results)
|
61 |
+
else:
|
62 |
+
return render_template('index.html', results=None)
|
63 |
+
|
64 |
+
if __name__ == '__main__':
|
65 |
+
app.run(host="0.0.0.0", port=7860)
|
ebd4appdom.xlsx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c00f2818e0c2e19382d6d180b020c4e8a03b681f5b7d8afb8cf39b620b5faea6
|
3 |
+
size 332417405
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
torch>=1.6
|
5 |
+
flask
|
6 |
+
sentence-transformers
|
7 |
+
openpyxl
|
templates/index.html
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8">
|
5 |
+
<title>SEPredictor</title>
|
6 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-aFq/bzH65dt+w6FI2ooMVUpc+21e0SRygnTpmBvdBgSdnuTN7QbdgL+OapgHtvPp" crossorigin="anonymous">
|
7 |
+
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha2/dist/js/bootstrap.bundle.min.js" integrity="sha384-qKXV1j0HvMUeCBQ+QVp7JcfGl760yU08IQ+GpUo5hlbpg51QRiuqHAJz8+BrxE/N" crossorigin="anonymous"></script>
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
background-color: #EEEEEE;
|
11 |
+
}
|
12 |
+
form {
|
13 |
+
padding: 5em 10em;
|
14 |
+
}
|
15 |
+
.btn-primary {
|
16 |
+
font-size: 1.2em;
|
17 |
+
padding: 0em 7em;
|
18 |
+
display: block;
|
19 |
+
margin: 0 auto;
|
20 |
+
}
|
21 |
+
.similarsamples{
|
22 |
+
padding: 2em 2em;
|
23 |
+
}
|
24 |
+
.navbar {
|
25 |
+
position: fixed;
|
26 |
+
top: 0;
|
27 |
+
width: 100%;
|
28 |
+
z-index: 1;
|
29 |
+
}
|
30 |
+
</style>
|
31 |
+
|
32 |
+
</head>
|
33 |
+
|
34 |
+
|
35 |
+
<body>
|
36 |
+
|
37 |
+
|
38 |
+
<nav class="navbar bg-body-tertiary">
|
39 |
+
<div class="container-fluid">
|
40 |
+
<a class="navbar-brand" href="http://127.0.0.1:5000/">SEPredictor</a>
|
41 |
+
</div>
|
42 |
+
</nav>
|
43 |
+
|
44 |
+
|
45 |
+
<form method="POST" action="/">
|
46 |
+
<div class="mb-3">
|
47 |
+
<label for="query">Enter your patent description:</label>
|
48 |
+
<textarea type="text" id="query" name="query" class="form-control" rows="10"></textarea>
|
49 |
+
</div>
|
50 |
+
<input type="submit" value="Search similar patents" class="btn btn-primary mb-3">
|
51 |
+
</form>
|
52 |
+
|
53 |
+
|
54 |
+
<div class="similarsamples">
|
55 |
+
{% if results %}
|
56 |
+
<h2>Description Samples:</h2>
|
57 |
+
{% for result in results %}
|
58 |
+
<div class="accordion accordion-flush" id="accordionFlushExample">
|
59 |
+
<div class="accordion-item">
|
60 |
+
<h2 class="accordion-header">
|
61 |
+
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target= '#{{ result[0][1] }}' aria-expanded="false" aria-controls="flush-collapseOne">
|
62 |
+
<p>{{ result[0][0] }}</p>
|
63 |
+
</button>
|
64 |
+
</h2>
|
65 |
+
<div id="{{ result[0][1] }}" class="accordion-collapse collapse" data-bs-parent="#accordionFlushExample">
|
66 |
+
<div class="accordion-body">
|
67 |
+
<table class="table table-light table-striped">
|
68 |
+
<thead>
|
69 |
+
<tr>
|
70 |
+
<th scope="col">Score</th>
|
71 |
+
<th scope="col">Sample</th>
|
72 |
+
<th scope="col">Standards</th>
|
73 |
+
<th scope="col">Document</th>
|
74 |
+
</tr>
|
75 |
+
</thead>
|
76 |
+
<tbody>
|
77 |
+
{% for i in range(1,6) %}
|
78 |
+
<tr>
|
79 |
+
<th scope="row">{{ result[i]['score'] }}</th>
|
80 |
+
<td>{{ result[i]['desc'] }}</td>
|
81 |
+
<td>{{ result[i]['standards'] }}</td>
|
82 |
+
<td><a href= {{ result[i]['url'] }}>Open</a></td>
|
83 |
+
</tr>
|
84 |
+
{% endfor %}
|
85 |
+
</tbody>
|
86 |
+
</table>
|
87 |
+
</div>
|
88 |
+
</div>
|
89 |
+
</div>
|
90 |
+
</div>
|
91 |
+
{% endfor %}
|
92 |
+
{% endif %}
|
93 |
+
</div>
|
94 |
+
</body>
|
95 |
+
</html>
|