MTASTE
Collection
Multilingual Target-Aspect-Sentiment Triplet Extraction • 10 items • Updated
YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
!pip install unsloth -q
from unsloth import FastModel
import torch, json, re
model, tokenizer = FastModel.from_pretrained(
model_name = "hienbm/gemma-2-9b-mtaste-16bit",
max_seq_length = 2048,
load_in_4bit = True,
)
FastModel.for_inference(model)
ASPECT_CATEGORIES = [
"AMBIENCE#GENERAL", "DRINKS#PRICES", "DRINKS#QUALITY", "DRINKS#STYLE_OPTIONS",
"FOOD#PRICES", "FOOD#QUALITY", "FOOD#STYLE_OPTIONS", "LOCATION#GENERAL",
"RESTAURANT#GENERAL", "RESTAURANT#MISCELLANEOUS", "RESTAURANT#PRICES", "SERVICE#GENERAL",
]
INSTRUCTION = (
"Given a restaurant review, extract all sentiment triplets.\n"
"Read the ENTIRE review first to understand context, sarcasm, and irony.\n"
"Then extract triplets SENTENCE BY SENTENCE in the ORDER they appear.\n\n"
"Output a JSON array sorted by appearance order in the review:\n"
'[{"target": <word/phrase or "NULL">, '
'"aspect": <ASPECT#CATEGORY>, "polarity": <positive|negative|neutral>}]\n\n'
"aspect must be one of: " + ", ".join(ASPECT_CATEGORIES) + "\n\n"
"Rules:\n"
"- Sentence order: extract from sentence 1 first, then sentence 2, etc.\n"
"- Multiple triplets per sentence: one object per triplet, keep order\n"
"- target: exact word/phrase from text, or NULL if implicit\n"
"- Output ONLY the JSON array, no explanation\n\n"
'Example:\nReview: "Food was great. Service was slow."\n'
'Output: [{"target": "food", "aspect": "FOOD#QUALITY", "polarity": "positive"}, '
'{"target": "NULL", "aspect": "SERVICE#GENERAL", "polarity": "negative"}]'
)
def build_prompt(text: str) -> str:
return (
"<start_of_turn>user\n"
f"{INSTRUCTION}\n\nReview: {text}"
"<end_of_turn>\n"
"<start_of_turn>model\n"
)
def parse_output(raw: str) -> list[dict]:
match = re.search(r"\[.*?\]", raw, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
try:
return json.loads(raw)
except json.JSONDecodeError:
return []
review = "This place serves fast, it's been over 30 minutes and the dish still hasn't come out."
inputs = tokenizer(build_prompt(review), return_tensors="pt").to("cuda")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens = 1024,
temperature = 0.8,
do_sample = False,
pad_token_id = tokenizer.eos_token_id,
)
raw = tokenizer.decode(
output[0][inputs["input_ids"].shape[1]:],
skip_special_tokens=True,
).strip()
triplets = parse_output(raw)
print(f"Review : {review}\n")
print(f"Raw output : {raw}\n")
print("Extracted triplets:")
for i, t in enumerate(triplets, 1):
print(f" {i}. target={t['target']!r:20s} aspect={t['aspect']:30s} polarity={t['polarity']}")