joaomorossini commited on
Commit
f11dfb5
1 Parent(s): 0822b20

First commit for new HF Space. Adapted from https://huggingface.co/spaces/tseronni/startup_genome

Browse files
Files changed (9) hide show
  1. .gitignore +8 -0
  2. README.md +5 -5
  3. app.py +178 -0
  4. customization.py +46 -0
  5. description_subsector.json +98 -0
  6. examples.py +7 -0
  7. llm.py +116 -0
  8. requirements.txt +4 -0
  9. subsectors.csv +18 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ /data
2
+ .env
3
+ .DS_Store
4
+ .idea/
5
+ __pycache__/
6
+ logs.csv
7
+ reference.py
8
+ /venv
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: AI Patent Classification
3
- emoji: 🌍
4
- colorFrom: red
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.20.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: Startup Genome
3
+ emoji: 📚
4
+ colorFrom: gray
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.17.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from pandas import DataFrame as PandasDataFrame
8
+
9
+ from llm import MessageChatCompletion
10
+ from customization import css, js
11
+ from examples import example_1, example_2, example_3, example_4
12
+
13
+ load_dotenv()
14
+
15
+ API_KEY = os.getenv("API_KEY")
16
+
17
+
18
+ df = pd.read_csv('subsectors.csv')
19
+ logs_columns = ['Abstract', 'Model', 'Results']
20
+
21
+
22
+ def update_logs(new_log_entry=None):
23
+ logs_df = pd.read_csv('logs.csv')
24
+ logs_df = pd.concat([logs_df, new_log_entry], ignore_index=True)
25
+ logs_df.to_csv('logs.csv', columns=logs_columns)
26
+ return logs_df
27
+
28
+
29
+ def build_context(row):
30
+ subsector_name = row['Subsector']
31
+ context = f"Subsector name: {subsector_name}. "
32
+ context += f"{subsector_name} Definition: {row['Definition']}. "
33
+ context += f"{subsector_name} keywords: {row['Keywords']}. "
34
+ context += f"{subsector_name} Does include: {row['Does include']}. "
35
+ context += f"{subsector_name} Does not include: {row['Does not include']}.\n"
36
+
37
+ return context
38
+
39
+
40
+ def click_button(model, api_key, abstract):
41
+ labels = df['Subsector'].tolist()
42
+ contexts = [build_context(row) for _, row in df.iterrows()]
43
+ language_model = MessageChatCompletion(model=model, api_key=api_key)
44
+ system_message = (f"""
45
+ You are a system designed to classify patent abstracts into one or more subsectors based on their content.
46
+ Each subsector is defined by a unique set of characteristics:
47
+ Name: The name of the subsector.
48
+ Definition: A brief description of the subsector.
49
+ Keywords: Important words associated with the subsector.
50
+ Does include: Elements typically found within the subsector.
51
+ Does not include: Elements typically not found within the subsector.
52
+ Consider 'nan' values as 'not available' or 'not applicable'.
53
+ When classifying an abstract, provide the following:
54
+ ## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
55
+ ## 2. Reasoning:
56
+ ### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
57
+ ### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
58
+ ### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
59
+ ### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
60
+ ## 3. Non-selected Subsectors:
61
+ - If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
62
+ ## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
63
+ ## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
64
+ <context>
65
+ {contexts}
66
+ </context>
67
+ """)
68
+
69
+
70
+ user_message = f"""
71
+ Classify this patent abstract into one or more labels, then format your response as markdown:
72
+
73
+ <labels>
74
+ {labels}
75
+ </labels>
76
+
77
+ <abstract>
78
+ {abstract}
79
+ </abstract>
80
+ """
81
+
82
+ language_model.new_system_message(content=system_message)
83
+ language_model.new_user_message(content=user_message)
84
+ language_model.send_message()
85
+
86
+ response_reasoning = language_model.get_last_message()
87
+
88
+ dict_pattern = r'\{.*?\}'
89
+ probabilities_match = re.search(dict_pattern, response_reasoning, re.DOTALL)
90
+
91
+ if probabilities_match and language_model.error is False:
92
+ probabilities_dict = eval(probabilities_match.group(0))
93
+ else:
94
+ probabilities_dict = {}
95
+
96
+ # Save classification results to logs.csv
97
+ new_log_entry = pd.DataFrame({'Abstract': [abstract], 'Model': [model], 'Results': [str(probabilities_dict)]})
98
+ update_logs(new_log_entry=new_log_entry)
99
+
100
+ return probabilities_dict, response_reasoning
101
+
102
+ def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
103
+ selected = df.iloc[[evt.index[0]]].iloc[0]
104
+ name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
105
+ name_accordion = gr.Accordion(label=name)
106
+ return name_accordion, definition, keywords, does_include, does_not_include
107
+
108
+
109
+ # with gr.Blocks(theme=theme) as startup_genome_demo:
110
+ with gr.Blocks(css=css, js=js) as demo:
111
+ state_lotto = gr.State()
112
+ selected_x_labels = gr.State()
113
+ with gr.Tab("Patent Discovery"):
114
+ with gr.Row():
115
+ with gr.Column(scale=5):
116
+ dropdown_model = gr.Dropdown(
117
+ label="Model",
118
+ choices=["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-0125"],
119
+ value="gpt-3.5-turbo-0125",
120
+ multiselect=False,
121
+ interactive=True
122
+ )
123
+ with gr.Column(scale=5):
124
+ api_key = gr.Textbox(
125
+ label="API KEY",
126
+ interactive=True,
127
+ lines=1,
128
+ max_lines=1,
129
+ type="password",
130
+ value=API_KEY
131
+ )
132
+ with gr.Row(equal_height=True):
133
+ abstract_description = gr.Textbox(
134
+ label="Abstract description",
135
+ lines=5,
136
+ max_lines=10000,
137
+ interactive=True,
138
+ placeholder="Input a patent abstract"
139
+ )
140
+ with gr.Row():
141
+ with gr.Accordion(label="Example Abstracts", open=False):
142
+ gr.Examples(
143
+ examples=[example_1, example_2, example_3, example_4],
144
+ inputs=abstract_description,
145
+ fn=click_button,
146
+ label="",
147
+ # cache_examples=True,
148
+ )
149
+ with gr.Row():
150
+ btn_get_result = gr.Button("Show classification")
151
+ with gr.Row(elem_classes=['all_results']):
152
+ with gr.Column(scale=4):
153
+ label_result = gr.Label(num_top_classes=None)
154
+ with gr.Column(scale=6):
155
+ reasoning = gr.Markdown(label="Reasoning", elem_classes=['reasoning_results'])
156
+ with gr.Tab("Subsector definitions"):
157
+ with gr.Row():
158
+ with gr.Column(scale=4):
159
+ df_subsectors = gr.DataFrame(df[['Subsector']], interactive=False, height=800)
160
+ with gr.Column(scale=6):
161
+ with gr.Accordion(label='Artificial Intelligence, Big Data and Analytics') as subsector_name:
162
+ s1_definition = gr.Textbox(label="Definition", lines=5, max_lines=100, value="Virtual reality (VR) is an artificial, computer-generated simulation or recreation of a real life environment or situation. Augmented reality (AR) is a technology that layers computer-generated enhancements atop an existing reality in order to make it more meaningful through the ability to interact with it. ")
163
+ s1_keywords = gr.Textbox(label="Keywords", lines=5, max_lines=100,
164
+ value="Mixed Reality, 360 video, frame rate, metaverse, virtual world, cross reality, Artificial intelligence, computer vision")
165
+ does_include = gr.Textbox(label="Does include", lines=4)
166
+ does_not_include = gr.Textbox(label="Does not include", lines=3)
167
+ with gr.Tab("Logs"):
168
+ def load_logs():
169
+ logs = pd.read_csv('logs.csv')
170
+ return logs
171
+ gr.Dataframe(load_logs(), height=1000)
172
+
173
+ btn_get_result.click(fn=click_button, inputs=[dropdown_model, api_key, abstract_description], outputs=[label_result, reasoning])
174
+ df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
175
+
176
+ if __name__ == "__main__":
177
+ # demo.queue()
178
+ demo.launch()
customization.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # js = """
2
+ # function createTitle() {
3
+ # var container = document.createElement('div');
4
+ # container.id = 'app-title';
5
+ # container.style.fontSize = '2em';
6
+ # container.style.fontWeight = 'bold';
7
+ # container.style.textAlign = 'center';
8
+ # container.style.marginBottom = '20px';
9
+ #
10
+ # var text = 'Automated Patent Classification with NLP';
11
+ #
12
+ # var gradioContainer = document.querySelector('.gradio-container');
13
+ # gradioContainer.insertBefore(container, gradioContainer.firstChild);
14
+ #
15
+ # return 'Title created';
16
+ # }
17
+ # """
18
+ js = """
19
+ function createGradioStaticText() {
20
+ var container = document.createElement('div');
21
+ container.id = 'gradio-animation'; // Keep the ID for consistent styling
22
+ container.style.fontSize = '2em';
23
+ container.style.textAlign = 'center';
24
+ container.style.marginBottom = '10px';
25
+ container.style.fontFamily = 'Roboto, sans-serif';
26
+
27
+ var text = 'Zero-Shot Patent Classifier';
28
+ container.innerText = text; // Set the text content directly
29
+
30
+ var gradioContainer = document.querySelector('.gradio-container');
31
+ gradioContainer.insertBefore(container, gradioContainer.firstChild);
32
+ }
33
+ """
34
+
35
+
36
+ css = """
37
+ .all_results {
38
+ height: 90vh;
39
+ }
40
+
41
+ .reasoning_results {
42
+ overflow: scroll;
43
+ height: 85vh;
44
+ }
45
+
46
+ """
description_subsector.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Artificial Intelligence; Big Data and Analytics": {
3
+ "Definition": "Artificial Intelligence companies offer products and services modeled on computer systems that simulate human cognition. These systems can sense their environment, learn, think, and respond to stimuli in a way similar to humans.Big Data and Analytics as a sub-sector refers to companies that provide solutions with the core function of identifying patterns and trends from large volumes / sets of data that cannot be processed by traditional database and analysis software.",
4
+ "Keywords": "Automated intelligence, assisted intelligence, augmented Intelligence, autonomous intelligence, supervised learning, unsupervised learning, adaptive systems, computer vision, natural language processing, natural language generation, text analytics, speech recognition, semantics technology, decision management, virtual agents, robotic process automation, machine learning, autonomous vehicle, smart robotics, autonomous vehicles, facial and image recognition,Advanced algorithms, advanced analytics, advanced visualization, data mining, user behavior analytics, data storytelling, business intelligence",
5
+ "old_name": "Artificial Intelligence, Big Data and Analytics"
6
+ },
7
+ "Advanced Manufacturing and Robotics": {
8
+ "Definition": "In comparison to Traditional Manufacturing, Advanced Manufacturing involves the use of technology to improve products and/or processes, with the relevant technology being described as \"advanced,\" \"innovative,\" or \"cutting edge.\"",
9
+ "Does include": "Autonomous driving, Industrial robots, Industrial Drones, 3D printing, Industrial IOT, additive manufacturing, advanced manufacturing, industry 4.0, nano-materials, industrial internet of things, cyber physical systems, cyber manufacturing",
10
+ "Does not include": "Traditional manufacturing machines",
11
+ "Keywords": " 3d printing, industrial IoT, internet of things, additive manufacturing, integrated circuit, manufacturing company, machine learning, industrial automation, robotics, predictive maintenance, semiconductor",
12
+ "old_name": "Advanced Manufacturing and Robotics"
13
+ },
14
+ "Clean Technology": {
15
+ "Definition": "Cleantech or clean technology is an umbrella term which is used to define technologies which optimize the use of natural resources, produce energy from renewable sources, increase efficiency and productivity, generate less waste and cause less environmental pollution. Cleantech consists of sustainable solutions in the fields of energy, water, transportation, agriculture and manufa systems.cturing, including advanced material, smart grids, water treatment, efficient energy storage and distributed energy",
16
+ "Keywords": "Clean energy, and other forms of environmental, and sustainable or green, products and services, advanced material, smart grids, water treatment, efficient energy storage, distributed energy system. sustainable energy, solar panels",
17
+ "Does not include": "oil and gas, petrochemicals",
18
+ "old_name": "Cleantech"
19
+ },
20
+ "Financial Technology": {
21
+ "Definition": "Describes a business that aims at providing financial services by making use of software and modern technology. [7]Financial technology (Fintech) companies use internet, blockchain and software technologies, as well as algorithms, to offer or facilitate financial services traditionally offered by banks. These services include loans, payments, investments and wealth management. Fintech also includes software that automates financial processes or addresses financial firms\u2019 core business needs",
22
+ "Keywords": "Insurance Tech, Risk Management, Trading, Portfolios, Personal Finance, Banking, Accounting, Digital Currency, Cryptocurrency, data-driven finance, phone banking, robo-advisory, regtech.",
23
+ "Does not include": "Brick & Mortar banks, Old brick and mortar Insurance companies",
24
+ "old_name": "Fintech"
25
+ },
26
+ "Blockchain": {
27
+ "Definition": "Companies that develop applications using blockchains, or distributed databases/public ledgers that record transactions and other digital events across a peer-to-peer network. The integrity of each record on a blockchain is cryptographically verified by network participants to create a distributed consensus. Cryptocurrencies and tokens are popular applications of blockchain technology.",
28
+ "Keywords": "Distributed ledgers, Digital Mining,Cryptocurrency, Crypto tokens, Altcoins, Bitcoin, Ethereum, Monero, Crypto Exchanges, Smart Contracts, Initial Coin Offerings, decentralized finance (DeFi) applications, non-fungible tokens (NFTs)",
29
+ "old_name": "Blockchain"
30
+ },
31
+ "Cybersecurity": {
32
+ "Definition": "Cybersecurity is the body of technologies, processes, and practices designed to protect networks, computers, programs, and data from attack, damage, or unauthorizedaccess. For our purposes it includes application security, information security, networksecurity, disaster recovery / business continuity planning, operational security, andend-user education.",
33
+ "Does include": "Application security, Information security, Network security, Disaster recovery / business continuity planning, Operational security and End-user education.",
34
+ "Keywords": "cyber security, network security, data security, cloud security, threat detection, mobile security, cyber threat detection",
35
+ "old_name": "Cybersecurity"
36
+ },
37
+ "Agriculture Technology": {
38
+ "Definition": "Technologies to help the agriculture industry to feed our growing population without destroying the planet. AgTech is the use of technology in agriculture, horticulture, and aquaculture with the aim of improving yield, efficiency, and profitability through information monitoring and analysis of weather, pests, soil and air temperature. AgTech also includes the use of automation, such as controlling heaters and irrigation and employing pest control through aerosol pheromone dispersal.AgTech can be products, services or applications derived from agriculture that improve various input/output processes. ",
39
+ "Does include": "Precision agriculture - PA is an approach to farm management that uses information technology to ensure that the crops and soil receive exactly what they need for optimum health and productivity. Integrated genetics, Physical inputs, Information technology, Smart machinery, Spectral imaging for remote sensing for monitoring the development and health of crops, data-enabled agriculture, food technology, agricultural robots, new foods, efficient sowing",
40
+ "Does not include": "Farms, Vineyards, Coffee roasters, Beverages",
41
+ "old_name": "Agtech"
42
+ },
43
+ "New Food": {
44
+ "Definition": "New Food includes technologies that can be leveraged to create efficiency and sustainability in designing, producing, choosing, delivering, and consuming food. Solutions and processes that leverage science and technology to create new types of foods and beverages. This category includes alternative proteins such as plant-based meat, fermentation and cellular agriculture, insects-based products, functional food and drinks as well as meal replacements.",
45
+ "Does not include": "alt- proteins based skincare products, cannabis related skincare of wellness products (by wellness we do not consider products which are not consumed as food or in meal )",
46
+ "Keywords": "artificial meat, Alternative protein, Plant-based, vegan food, vegan alternative, vegan product, animal protein, clean meat, plant-based meat, cell-based meat, cellular agriculture technology, meat alternative, meat substitute, insect protein, insect-based, probiotic product, fermentation, cannabis-derived products, cannabis-based, CBD-infused, adaptogenic, nootropic, dairy alternative, mushroom food products, mushroom-based",
47
+ "old_name": "New Food"
48
+ },
49
+ "Advertising Technology": {
50
+ "Definition": "Advertising technology - different types of analytics and digital tools used in the context of advertising and marketing. Extensive and complex systems used to direct advertising to individuals and specific target audiences. It is the use of tools and software advertisers used to reach audiences, deliver and measure digital advertising campaigns.",
51
+ "Keywords": "Conversion/optimization, Email marketing, Mobile marketing, Online & display advertising, Online surveys, tests and panels, Paid search / PPC / AdWords, Search Engine Optimization, Social media & viral marketing, Usability & web design, Web analytics & tracking, Affiliate marketing, B2B digital marketing, ad networks",
52
+ "Does not include": "Companies whose products and services are not focused on innovation in the advertising and marketing (e.g. all the companies that have advertising or marketing Keywords but actually only have ads on their websites etc.), martech (marketing technology) which focuses on reaching audiences via unpaid or owned channels using a brand\u2019s first-party insights",
53
+ "old_name": "Adtech"
54
+ },
55
+ "Blue Economy": {
56
+ "Definition": "Blue economy is the \"sustainable use of ocean resources for economic growth, improved livelihoods, and jobs while preserving the health of the ocean ecosystem. A concept that seeks to promote economic growth, social inclusion and the preservation of livelihoods while at the same time ensuring environmental sustainability of the oceans and coastal areas.",
57
+ "Keywords": "ocean sustainability, aquaculture, seafloor mapping, underwater mapping, Marine Transport, Ports, and Shipbuilding, Commercial Fisheries, Coastal and Marine Tourism, maritime logistics, maritime, inwater imaging, sea/ocean waste management",
58
+ "old_name": "Blue Economy"
59
+ },
60
+ "Digital Media": {
61
+ "Definition": "Digital media is any media that is encoded in a machine-readable format. Digital media can be created, viewed, distributed, modified and preserved on digital electronics devices. Digitized content (text, graphics, audio, and video) that can be transmitted over internet or computer networks.",
62
+ "Does include": "Content, Publishing, Blogs, Social Media, Messengers, News, Gaming, Video, Music, Digital Images, Virtual Reality, Augmented Reality, Digital Media Solutions",
63
+ "Keywords": "digitized information, marketing platform, lead generation, social media marketing, digital advertising, marketing campaign, customer engagement",
64
+ "old_name": "Digital Media"
65
+ },
66
+ "Gaming": {
67
+ "Definition": "Development, marketing and monetization of video games and gambling machines / services",
68
+ "Does include": "PC Gaming, Console Gaming, Mobile Gaming, Smart Gambling Machines, Internet Gambling Services",
69
+ "Does not include": "",
70
+ "Keywords": "gaming platform, video games, online games, gaming studio, social games",
71
+ "old_name": "Gaming"
72
+ },
73
+ "Augmented Reality; Virtual Reality": {
74
+ "Definition": "Virtual reality (VR) is an artificial, computer-generated simulation or recreation of a real life environment or situation. Augmented reality (AR) is a technology that layers computer-generated enhancements atop an existing reality in order to make it more meaningful through the ability to interact with it. ",
75
+ "Keywords": "Mixed Reality, 360 video, frame rate, metaverse, virtual world, cross reality, Artificial intelligence, computer vision",
76
+ "old_name": "AR / VR"
77
+ },
78
+ "Educational Technology": {
79
+ "Definition": "Education Technology (also known as EdTech) refers to an area of technology devoted to the development and application of tools (including software, hardware, and processes) intended to promote education. Put another way, \u201cEdTech is a study and ethical practice for facilitating learning and improving performance by creating, using and managing appropriate technological processes and resources.",
80
+ "Keywords": "online education, educational platform, e-learning, online learning, personalized learning, educational software, learning app, digital education",
81
+ "old_name": "Edtech"
82
+ },
83
+ "Industry 4.0": {
84
+ "Definition": "Industry 4.0 is revolutionizing the way companies manufacture, improve and distribute their products. Manufacturers are integrating new technologies, including Internet of Things (IoT), cloud computing and analytics, and AI and machine learning into their production facilities and throughout their operations.These smart factories are equipped with advanced sensors, embedded software and robotics that collect and analyze data and allow for better decision making. Even higher value is created when data from production operations is combined with operational data from ERP, supply chain, customer service and other enterprise systems to create whole new levels of visibility and insight from previously siloed information. These digital technologies lead to increased automation, predictive maintenance, self-optimization of process improvements and, above all, a new level of efficiency and responsiveness to customers not previously possible.",
85
+ "Keywords": "Internet of Things (IoT), Cloud computing, AI and Machine Learning (ML), Edge computing, Cybersecurity, Supply chain, cloud computing and Big Data, Predictive maintenance, Robotics, Sensors, Autonomous Vehicles",
86
+ "old_name": "Industry 4.0"
87
+ },
88
+ "Biopharmaceutical; Biotechonology": {
89
+ "Definition": "Biopharma is the sub-sector that includes any prescription or non-prescription spending on drugs to treat a disease or a health condition and is regulated by the health authorities. This does not include dietary supplements or any health foods, functional foods, or nutriceuticals. It also includes over-the-counter (OTC) drugs that do not require prescriptions. [6] Biotechnology is the use of living systems and organisms to develop or make products, or \"any technological application that uses biological systems, living organisms, or derivatives thereof, to make or modify products or processes for specific use\" (UN Convention on Biological Diversity, Art. 2). Depending on the tools and applications, it often overlaps with the (related) fields of bioengineering, biomedical engineering, biomanufacturing, molecular engineering, etc.",
90
+ "Does include": "Biotechnology health care products and vaccines, agricultural biotechnology, biorefineries",
91
+ "Keywords": "DNA, Cloning, Genetically designed, Bioengineering, DNA sequencing, biomolecule labeling.",
92
+ "old_name": "Biopharma / Biotech"
93
+ },
94
+ "Medical Technology; Medical devices": {
95
+ "Definition": "The Medtech sub-sector is primarily focused on designing and manufacturing medical technological equipment, devices, and tools. The definition should exclude distributors and service providers, such as contract research organizations or contract manufacturing organizations\t. [3] Performing functions like diagnostics,therapeutic devices, treatment and drug delivery.",
96
+ "old_name": "Medtech / Medical devices"
97
+ }
98
+ }
examples.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ example_1 = "A computer vision system includes a camera that captures a plurality of image frames in a target field. A user interface is coupled to the camera. The user interface is configured to perform accelerated parallel computations in real-time on the plurality of image frames acquired by the camera. The system provides information relative to utilization and occupancy heatmaps"
2
+
3
+ example_2 = "An artificial intelligence apparatus is provided. The artificial intelligence apparatus includes: a communication interface configured to receive status information from an IoT device; and a processor configured to detect whether the IoT device is stolen based on the received state information, acquire position information of the IoT device when theft of the IoT device is detected, control the communication interface to transmit the acquired position information to a mobile terminal, receive a theft process execution command request of the IoT device from the mobile terminal, and transmit a theft process execution command corresponding to the received theft process execution request to the IoT device"
4
+
5
+ example_3 = "Access to microscope images while a sample is scanned or images are generated or uploaded can decrease an amount of time that a user waits to view a region of interest of an image. A processor can be configured to allow a remote user to access a portion of an image at full or partial resolution while other portions of the sample are being scanned, or while one or more images are generated or uploaded at full or partial resolution. A processor stored locally with the microscope can be configured to allow a remote user to access a completed scan over the internet prior to the scan being fully uploaded over a network such as the internet to a remote server. In some embodiments, a processor may be coupled to a microscope, a user device, or a remote server"
6
+
7
+ example_4 = "A biodegradable container made from plant-based plastic that holds, in a preferred embodiment, one to two ounces of a gel-like or otherwise viscous liquid for immediate nutrition. Once opened and used, the container can be safely disposed of on the ground where it will be broken down by natural factors like rain over several days"
llm.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+
4
+ class MessageChatCompletion:
5
+ def __init__(self,
6
+ model: str = 'gpt-3.5-turbo-0125',
7
+ message: str = '',
8
+ api_key: str = '',
9
+ temperature: float = 0.10,
10
+ top_p: float = 0.95,
11
+ n: int = 1,
12
+ stream: bool = False,
13
+ stop: str = "\n",
14
+ max_tokens: int = 4096,
15
+ presence_penalty: float = 0.0,
16
+ frequency_penalty: float = 0.0,
17
+ logit_bias: int = None,
18
+ user: str = ''):
19
+
20
+ self.api_key = api_key
21
+ openai.api_key = self.api_key
22
+
23
+ if model in ["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-0125"]:
24
+ self.endpoint = "https://api.openai.com/v1/chat/completions"
25
+ else:
26
+ self.endpoint = "https://api.openai.com/v1/completions"
27
+
28
+ self.headers = {
29
+ "Content-Type": "application/json",
30
+ "Authorization": f"Bearer {self.api_key}",
31
+ }
32
+
33
+ self.prompt = {
34
+ "model": model,
35
+ "messages": [],
36
+ "temperature": temperature,
37
+ "top_p": top_p,
38
+ "n": n,
39
+ "stream": stream,
40
+ "stop": stop,
41
+ "presence_penalty": presence_penalty,
42
+ "frequency_penalty": frequency_penalty
43
+ }
44
+
45
+ if max_tokens is not None:
46
+ self.prompt["max_tokens"] = max_tokens
47
+
48
+ if logit_bias is not None:
49
+ self.prompt["logit_bias"] = logit_bias
50
+
51
+ if user != '':
52
+ self.prompt["user"] = user
53
+
54
+ if message != '':
55
+ self.new_user_message(content=message)
56
+
57
+ self.response = ''
58
+
59
+ self.error = False
60
+
61
+ def new_message(self, role: str = 'user', content: str = '', name: str = ''):
62
+ new_message = {"role": role, "content": f"{content}"}
63
+ if name != '':
64
+ new_message['name'] = name
65
+
66
+ self.prompt['messages'].append(new_message)
67
+
68
+ def new_user_message(self, content: str = '', name: str = ''):
69
+ self.new_message(role='user', content=content, name=name)
70
+
71
+ def new_system_message(self, content: str = '', name: str = ''):
72
+ self.new_message(role='system', content=content, name=name)
73
+
74
+ def new_assistant_message(self, content: str = '', name: str = ''):
75
+ self.new_message(role='assistant', content=content, name=name)
76
+
77
+ def get_last_message(self):
78
+ return self.prompt['messages'][-1]['content']
79
+
80
+ def send_message(self):
81
+
82
+ try:
83
+ self.error = False
84
+
85
+ response = openai.chat.completions.create(
86
+ model=self.prompt['model'],
87
+ messages=self.prompt['messages'],
88
+ frequency_penalty=self.prompt['frequency_penalty'],
89
+ temperature=self.prompt['temperature'],
90
+ max_tokens=self.prompt['max_tokens'],
91
+ top_p=self.prompt['top_p'],
92
+ presence_penalty=self.prompt['presence_penalty'],
93
+ stream=self.prompt['stream']
94
+ )
95
+
96
+ full_response = response.choices[0].message.content
97
+
98
+ # if stream = True
99
+ # full_response = ""
100
+ # for chunk in response:
101
+ # chunk_message = chunk.choices[0].delta.content
102
+ # if chunk_message != '':
103
+ # full_response += chunk_message
104
+
105
+ self.new_system_message(content=full_response)
106
+
107
+ return self.response
108
+
109
+ except Exception as e:
110
+ self.error = True
111
+
112
+ if self.api_key == '' or self.api_key is None:
113
+ self.new_system_message(content="API key is missing")
114
+ else:
115
+ self.new_system_message(content=f"Unable to generate ChatCompletion response\nException: {e}")
116
+ return e
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ pandas
4
+ python-dotenv
subsectors.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Subsector,Definition,Keywords,old_name,Does include,Does not include
2
+ Advanced Manufacturing and Robotics,"In comparison to Traditional Manufacturing, Advanced Manufacturing involves the use of technology to improve products and/or processes, with the relevant technology being described as ""advanced,"" ""innovative,"" or ""cutting edge."""," 3d printing, industrial IoT, internet of things, additive manufacturing, integrated circuit, manufacturing company, machine learning, industrial automation, robotics, predictive maintenance, semiconductor",Advanced Manufacturing and Robotics,"Autonomous driving, Industrial robots, Industrial Drones, 3D printing, Industrial IOT, additive manufacturing, advanced manufacturing, industry 4.0, nano-materials, industrial internet of things, cyber physical systems, cyber manufacturing",Traditional manufacturing machines
3
+ Advertising Technology,"Advertising technology - different types of analytics and digital tools used in the context of advertising and marketing. Extensive and complex systems used to direct advertising to individuals and specific target audiences. It is the use of tools and software advertisers used to reach audiences, deliver and measure digital advertising campaigns.","Conversion/optimization, Email marketing, Mobile marketing, Online & display advertising, Online surveys, tests and panels, Paid search / PPC / AdWords, Search Engine Optimization, Social media & viral marketing, Usability & web design, Web analytics & tracking, Affiliate marketing, B2B digital marketing, ad networks",Adtech,,"Companies whose products and services are not focused on innovation in the advertising and marketing (e.g. all the companies that have advertising or marketing Keywords but actually only have ads on their websites etc.), martech (marketing technology) which focuses on reaching audiences via unpaid or owned channels using a brand’s first-party insights"
4
+ Agriculture Technology,"Technologies to help the agriculture industry to feed our growing population without destroying the planet. AgTech is the use of technology in agriculture, horticulture, and aquaculture with the aim of improving yield, efficiency, and profitability through information monitoring and analysis of weather, pests, soil and air temperature. AgTech also includes the use of automation, such as controlling heaters and irrigation and employing pest control through aerosol pheromone dispersal.AgTech can be products, services or applications derived from agriculture that improve various input/output processes. ",,Agtech,"Precision agriculture - PA is an approach to farm management that uses information technology to ensure that the crops and soil receive exactly what they need for optimum health and productivity. Integrated genetics, Physical inputs, Information technology, Smart machinery, Spectral imaging for remote sensing for monitoring the development and health of crops, data-enabled agriculture, food technology, agricultural robots, new foods, efficient sowing","Farms, Vineyards, Coffee roasters, Beverages"
5
+ Artificial Intelligence; Big Data and Analytics,"Artificial Intelligence companies offer products and services modeled on computer systems that simulate human cognition. These systems can sense their environment, learn, think, and respond to stimuli in a way similar to humans.Big Data and Analytics as a sub-sector refers to companies that provide solutions with the core function of identifying patterns and trends from large volumes / sets of data that cannot be processed by traditional database and analysis software.","Automated intelligence, assisted intelligence, augmented Intelligence, autonomous intelligence, supervised learning, unsupervised learning, adaptive systems, computer vision, natural language processing, natural language generation, text analytics, speech recognition, semantics technology, decision management, virtual agents, robotic process automation, machine learning, autonomous vehicle, smart robotics, autonomous vehicles, facial and image recognition,Advanced algorithms, advanced analytics, advanced visualization, data mining, user behavior analytics, data storytelling, business intelligence","Artificial Intelligence, Big Data and Analytics",,
6
+ Augmented Reality; Virtual Reality,"Virtual reality (VR) is an artificial, computer-generated simulation or recreation of a real life environment or situation. Augmented reality (AR) is a technology that layers computer-generated enhancements atop an existing reality in order to make it more meaningful through the ability to interact with it. ","Mixed Reality, 360 video, frame rate, metaverse, virtual world, cross reality, Artificial intelligence, computer vision",AR / VR,,
7
+ Biopharmaceutical; Biotechonology,"Biopharma is the sub-sector that includes any prescription or non-prescription spending on drugs to treat a disease or a health condition and is regulated by the health authorities. This does not include dietary supplements or any health foods, functional foods, or nutriceuticals. It also includes over-the-counter (OTC) drugs that do not require prescriptions. [6] Biotechnology is the use of living systems and organisms to develop or make products, or ""any technological application that uses biological systems, living organisms, or derivatives thereof, to make or modify products or processes for specific use"" (UN Convention on Biological Diversity, Art. 2). Depending on the tools and applications, it often overlaps with the (related) fields of bioengineering, biomedical engineering, biomanufacturing, molecular engineering, etc.","DNA, Cloning, Genetically designed, Bioengineering, DNA sequencing, biomolecule labeling.",Biopharma / Biotech,"Biotechnology health care products and vaccines, agricultural biotechnology, biorefineries",
8
+ Blockchain,"Companies that develop applications using blockchains, or distributed databases/public ledgers that record transactions and other digital events across a peer-to-peer network. The integrity of each record on a blockchain is cryptographically verified by network participants to create a distributed consensus. Cryptocurrencies and tokens are popular applications of blockchain technology.","Distributed ledgers, Digital Mining,Cryptocurrency, Crypto tokens, Altcoins, Bitcoin, Ethereum, Monero, Crypto Exchanges, Smart Contracts, Initial Coin Offerings, decentralized finance (DeFi) applications, non-fungible tokens (NFTs)",Blockchain,,
9
+ Blue Economy,"Blue economy is the ""sustainable use of ocean resources for economic growth, improved livelihoods, and jobs while preserving the health of the ocean ecosystem. A concept that seeks to promote economic growth, social inclusion and the preservation of livelihoods while at the same time ensuring environmental sustainability of the oceans and coastal areas.","ocean sustainability, aquaculture, seafloor mapping, underwater mapping, Marine Transport, Ports, and Shipbuilding, Commercial Fisheries, Coastal and Marine Tourism, maritime logistics, maritime, inwater imaging, sea/ocean waste management",Blue Economy,,
10
+ Clean Technology,"Cleantech or clean technology is an umbrella term which is used to define technologies which optimize the use of natural resources, produce energy from renewable sources, increase efficiency and productivity, generate less waste and cause less environmental pollution. Cleantech consists of sustainable solutions in the fields of energy, water, transportation, agriculture and manufa systems.cturing, including advanced material, smart grids, water treatment, efficient energy storage and distributed energy","Clean energy, and other forms of environmental, and sustainable or green, products and services, advanced material, smart grids, water treatment, efficient energy storage, distributed energy system. sustainable energy, solar panels",Cleantech,,"oil and gas, petrochemicals"
11
+ Cybersecurity,"Cybersecurity is the body of technologies, processes, and practices designed to protect networks, computers, programs, and data from attack, damage, or unauthorizedaccess. For our purposes it includes application security, information security, networksecurity, disaster recovery / business continuity planning, operational security, andend-user education.","cyber security, network security, data security, cloud security, threat detection, mobile security, cyber threat detection",Cybersecurity,"Application security, Information security, Network security, Disaster recovery / business continuity planning, Operational security and End-user education.",
12
+ Digital Media,"Digital media is any media that is encoded in a machine-readable format. Digital media can be created, viewed, distributed, modified and preserved on digital electronics devices. Digitized content (text, graphics, audio, and video) that can be transmitted over internet or computer networks.","digitized information, marketing platform, lead generation, social media marketing, digital advertising, marketing campaign, customer engagement",Digital Media,"Content, Publishing, Blogs, Social Media, Messengers, News, Gaming, Video, Music, Digital Images, Virtual Reality, Augmented Reality, Digital Media Solutions",
13
+ Educational Technology,"Education Technology (also known as EdTech) refers to an area of technology devoted to the development and application of tools (including software, hardware, and processes) intended to promote education. Put another way, “EdTech is a study and ethical practice for facilitating learning and improving performance by creating, using and managing appropriate technological processes and resources.","online education, educational platform, e-learning, online learning, personalized learning, educational software, learning app, digital education",Edtech,,
14
+ Financial Technology,"Describes a business that aims at providing financial services by making use of software and modern technology. [7]Financial technology (Fintech) companies use internet, blockchain and software technologies, as well as algorithms, to offer or facilitate financial services traditionally offered by banks. These services include loans, payments, investments and wealth management. Fintech also includes software that automates financial processes or addresses financial firms’ core business needs","Insurance Tech, Risk Management, Trading, Portfolios, Personal Finance, Banking, Accounting, Digital Currency, Cryptocurrency, data-driven finance, phone banking, robo-advisory, regtech.",Fintech,,"Brick & Mortar banks, Old brick and mortar Insurance companies"
15
+ Gaming,"Development, marketing and monetization of video games and gambling machines / services","gaming platform, video games, online games, gaming studio, social games",Gaming,"PC Gaming, Console Gaming, Mobile Gaming, Smart Gambling Machines, Internet Gambling Services",
16
+ Industry 4.0,"Industry 4.0 is revolutionizing the way companies manufacture, improve and distribute their products. Manufacturers are integrating new technologies, including Internet of Things (IoT), cloud computing and analytics, and AI and machine learning into their production facilities and throughout their operations.These smart factories are equipped with advanced sensors, embedded software and robotics that collect and analyze data and allow for better decision making. Even higher value is created when data from production operations is combined with operational data from ERP, supply chain, customer service and other enterprise systems to create whole new levels of visibility and insight from previously siloed information. These digital technologies lead to increased automation, predictive maintenance, self-optimization of process improvements and, above all, a new level of efficiency and responsiveness to customers not previously possible.","Internet of Things (IoT), Cloud computing, AI and Machine Learning (ML), Edge computing, Cybersecurity, Supply chain, cloud computing and Big Data, Predictive maintenance, Robotics, Sensors, Autonomous Vehicles",Industry 4.0,,
17
+ Medical Technology; Medical devices,"The Medtech sub-sector is primarily focused on designing and manufacturing medical technological equipment, devices, and tools. The definition should exclude distributors and service providers, such as contract research organizations or contract manufacturing organizations . [3] Performing functions like diagnostics,therapeutic devices, treatment and drug delivery.",,Medtech / Medical devices,,
18
+ New Food,"New Food includes technologies that can be leveraged to create efficiency and sustainability in designing, producing, choosing, delivering, and consuming food. Solutions and processes that leverage science and technology to create new types of foods and beverages. This category includes alternative proteins such as plant-based meat, fermentation and cellular agriculture, insects-based products, functional food and drinks as well as meal replacements.","artificial meat, Alternative protein, Plant-based, vegan food, vegan alternative, vegan product, animal protein, clean meat, plant-based meat, cell-based meat, cellular agriculture technology, meat alternative, meat substitute, insect protein, insect-based, probiotic product, fermentation, cannabis-derived products, cannabis-based, CBD-infused, adaptogenic, nootropic, dairy alternative, mushroom food products, mushroom-based",New Food,,"alt- proteins based skincare products, cannabis related skincare of wellness products (by wellness we do not consider products which are not consumed as food or in meal )"