shainaraza commited on
Commit
73cfee4
1 Parent(s): c5aa009

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -4
app.py CHANGED
@@ -13,10 +13,10 @@ class BiasPipeline:
13
  self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
14
  self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
15
 
16
- self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
17
- self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
18
- #self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
19
- #self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
20
 
21
  self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
22
  self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
@@ -31,6 +31,61 @@ class BiasPipeline:
31
  ner_results = self.ner(texts)
32
  return classification_results, ner_results
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Initialize the BiasPipeline
35
  pipeline = BiasPipeline()
36
 
 
13
  self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
14
  self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
15
 
16
+ #self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
17
+ #self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
18
+ self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
19
+ self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
20
 
21
  self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
22
  self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
 
31
  ner_results = self.ner(texts)
32
  return classification_results, ner_results
33
 
34
+
35
+ # Model setup for debiasing
36
+ debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
37
+ debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
38
+ debias_pipeline = transformers.pipeline(
39
+ "text-generation",
40
+ model=debias_model,
41
+ torch_dtype=torch.float16,
42
+ device_map="auto",
43
+ )
44
+
45
+ # Instruction for debiasing
46
+ instruction = ("Instruction: As a helpful, respectful and trustworthy debiasing assistant, your "
47
+ "task is to receive a text and return its unbiased version, without adding any unrelated content "
48
+ "or additional outputs.")
49
+
50
+
51
+ def get_debiased_sequence(prompt):
52
+ """Generate a debiased version of the provided text using the debiasing pipeline."""
53
+ instruction_prefix = "<s> <<SYS>> {instruction} <</SYS>> [INST]".format(instruction=instruction)
54
+ instruction_suffix = "[/INST]</s>"
55
+ full_input_text = f"{instruction_prefix}{prompt}{instruction_suffix}"
56
+
57
+ # Tokenize the full input text to calculate its length in tokens
58
+ input_tokens = debias_tokenizer.encode(full_input_text)
59
+
60
+ # Ensure max_length is greater than the number of input tokens
61
+ max_length = len(input_tokens) + 50 # Add a buffer to accommodate generation without truncation
62
+
63
+ try:
64
+ sequences = debias_pipeline(
65
+ full_input_text,
66
+ do_sample=True,
67
+ top_k=10,
68
+ num_return_sequences=1,
69
+ eos_token_id=debias_tokenizer.eos_token_id,
70
+ max_length=max_length, # Updated to use calculated max_length
71
+ )
72
+
73
+ if sequences:
74
+ res = sequences[0]['generated_text']
75
+ # Assuming the response also includes the [/INST] tag, split and extract after this tag
76
+ result_part = res.split('[/INST]')[-1]
77
+ clean_result = ''.join(c for c in result_part if c.isprintable())
78
+ return clean_result.strip()
79
+ except RuntimeError as e:
80
+ if 'CUDA out of memory' in str(e):
81
+ torch.cuda.empty_cache() # Try clearing cache to free up memory
82
+ return "Error: Out of memory. Please try again with shorter input or less complex instructions."
83
+ else:
84
+ raise e # Re-raise the exception if it's not a memory error
85
+
86
+ return "No output generated. Check model configuration or input."
87
+
88
+
89
  # Initialize the BiasPipeline
90
  pipeline = BiasPipeline()
91