| import streamlit as st |
| from datasets import load_dataset |
|
|
| |
| dataset = load_dataset('csv', data_files='CCI_Details_Structured_Full.csv') |
|
|
| |
| st.title('Contributor Search App') |
| st.write('Enter the name of a Contributor to search in the dataset.') |
|
|
| |
| contributor_query = st.text_input('Enter Contributor to search:') |
|
|
| |
| if contributor_query: |
| results = [ |
| example for example in dataset['train'] |
| if example.get('Contributor') and example['Contributor'].lower() == contributor_query.lower() |
| ] |
| st.write(results) |
|
|
| from transformers import T5ForConditionalGeneration, T5Tokenizer |
| from datasets import load_dataset |
|
|
| |
| model_name = "Lexim011/NISTER" |
| model = T5ForConditionalGeneration.from_pretrained(model_name) |
| tokenizer = T5Tokenizer.from_pretrained(model_name) |
|
|
| |
| dataset = load_dataset("Lexim011/Compliance") |
|
|
| |
| dataset = load_dataset("Lexim011/Compliance") |
|
|
| |
| def generate_answer(question, context): |
| input_text = f"question: {question} context: {context} </s>" |
| input_ids = tokenizer.encode(input_text, return_tensors="pt") |
|
|
| outputs = model.generate(input_ids) |
| answer = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return answer |
|
|
| |
| |
| for example in dataset['train']: |
| |
| definition = example['Definition'] |
| |
| |
| context = example['References'] |
|
|
| |
| answer = generate_answer(definition, context) |
| |
| |
| print(f"Definition: {definition}") |
| print(f"References: {context}") |
| print(f"Answer: {answer}") |
| print("---") |
|
|