{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "import pandas as pd\n", "from transformers import TapasTokenizer, TapasForQuestionAnswering\n", "from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering\n", "from transformers import pipeline\n", "import PIL\n", "\n", "# read the module_guide_tableQA\\0915NC_Studienplaetze.jpg as pil image\n", "pil_image = PIL.Image.open(\"0915NC_Studienplaetze.jpg\")\n", "# make that image a bit less high\n", "pil_image = pil_image.resize((int(pil_image.width * 0.5), int(pil_image.height * 0.5)))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_answer(\n", " dropdown,\n", " question,\n", " view_as_table=False,\n", " model=\"google/tapas-finetuned-wtq\",\n", " #progress=gr.Progress(),\n", "):\n", " #progress(0, desc=\"Looking for answer in module guide...\")\n", " df = pd.DataFrame()\n", " if dropdown == \"Master Information Systems\":\n", " df = pd.read_excel(\n", " r\"03_extracted_final_modules\\MS_IS_all_modules_orginal_15_rows_cleaned.xlsx\"\n", " )\n", " elif dropdown == \"Bachelor Information Systems\":\n", " df = pd.read_excel(r\"03_extracted_final_modules\\BA_IS_all_modules_15.xlsx\")\n", " elif dropdown == \"Bachelor Management\":\n", " df = pd.read_excel(r\"03_extracted_final_modules\\BA_MM_all_modules_15.xlsx\")\n", " df = df.astype(str)\n", " print(question)\n", " question = str(question)\n", " print(df.shape)\n", " question = [question]\n", "\n", " if model == \"google/tapas-finetuned-wtq\":\n", " tqa = pipeline(\n", " task=\"table-question-answering\", model=\"google/tapas-base-finetuned-wtq\"\n", " )\n", " elif model == \"google/tapas-large-finetuned-wtq\":\n", " tqa = pipeline(\n", " task=\"table-question-answering\", model=\"google/tapas-large-finetuned-wtq\"\n", " )\n", "\n", " results = tqa(table=df, query=question)\n", " print(results)\n", " cells_input = results[\"cells\"]\n", " cells_input = str(cells_input)\n", " cells_input = cells_input.replace(\"[\", \"\")\n", " cells_input = cells_input.replace(\"]\", \"\")\n", " cells_input = cells_input.replace(\"'\", \"\")\n", "\n", " print(cells_input)\n", " print(results)\n", " html_string_short = f\"
{cells_input}
\"\n", " row_numbers = [coord[0] for coord in results[\"coordinates\"]]\n", " df_short = df.iloc[row_numbers]\n", " df_short = df_short.dropna(axis=1, how=\"all\")\n", " df_short = df_short.loc[:, (df_short != \"--\").any(axis=0)]\n", " html_table = (\n", " f\"{df_short.to_html(index=False)}
\"\n", " )\n", "\n", " # check if there are more than 1 rows in df_short\n", " html_string = \"\"\n", " if df_short.shape[0] > 1 or view_as_table == True:\n", " html_string = html_table\n", " elif df_short.shape[0] == 1:\n", " html_string = \"\"\"\n", " \n", " \n", " \n", " \n", "Project Seminar
\n", "12-PS-192-m01
\n", "Faculty of Business Management and Economics
\n", "Holder of the Chair of Business Management and Business
\n", "15
\n", "numerical grade
\n", "1 semester
\n", "graduate
\n", "In small project teams of 4 to 10 members, students will spend several months actively working on a specific and realistic problem with practical relevance. They will progress through several project stages including as-is analysis, to-be conception and implementation of an IS solution. The project teams will be required to work independently and will only receive advice and minor support from research assistants.
\n", "Project: preparing a conceptual design (approx. 150 hours), designing and implementing an approach to solution (approx. 300 hours) as well as presentation (approx. 20 minutes), weighted 1:2:1
\n", "Language of assessment: German, English
\n", "Creditable for bonus
\n", "450 hours
\n", "View complete pdf here: {dropdown_item}
Ask whatever you want to know about the module guide here. You can ask formality-based and content-based questions.
'\n", " elif dropdown_item == \"Bachelor Information Systems\":\n", " html_link = f'View complete pdf here: {dropdown_item}Ask whatever you want to know about the module guide here. You can ask formality-based and content-based questions.
'\n", " elif dropdown_item == \"Bachelor Management\":\n", " html_link = f'View complete pdf here: {dropdown_item}Ask whatever you want to know about the module guide here. You can ask formality-based and content-based questions.
'\n", "\n", " return html_link" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7867\n", "Running on public URL: https://a7a23badcaa31f041e.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "" ], "text/plain": [ "View complete PDF here: Master Information Systems
\n", "Ask whatever you want to know about the module guide here. You can ask formality-based and content-based questions.
\n", " \"\"\"\n", " )\n", "\n", " table.change(change_html_link, table, html_link)\n", " question = gr.Textbox(\n", " label=\"Question\", value=\"How many ECTS credits does the project seminar have?\"\n", " )\n", " with gr.Accordion(\"Advanced Options\", open=False):\n", " with gr.Group():\n", " model_selction = gr.Dropdown(\n", " [\n", " \"google/tapas-finetuned-wtq\",\n", " \"google/tapas-large-finetuned-wtq\",\n", " ],\n", " label=\"Select Model\",\n", " value=\"google/tapas-finetuned-wtq\",\n", " )\n", " view_as_table_or_text = gr.Checkbox(\n", " label=\"View detailed information as table\", value=False\n", " )\n", "\n", " ask_btn = gr.Button(\"Ask The Assistant\")\n", " gr.HTML(\"