zhayunduo
/

roberta-base-stocktwits-finetuned

@@ -1,42 +1,64 @@
----
-license: apache-2.0
----
-## A project by NUS ISS students Frank Cao, Gerong Zhang, Jiaqi Yao, Sikai Ni, Yunduo Zhang
-This model is fine tuned with roberta-base model on 3200000 comments from stocktwits, with the user labeled tags 'Bullish' or 'Bearish'
-```python
-from transformers import RobertaForSequenceClassification, RobertaTokenizer
-from transformers import pipeline
-import pandas as pd
-# the model was trained upon below preprocessing
-def process_text(texts):
-  # remove URLs
-  texts = re.sub(r'https?://\S+', "", texts)
-  texts = re.sub(r'www.\S+', "", texts)
-  # remove '
-  texts = texts.replace('&#39;', "'")
-  # remove symbol names
-  texts = re.sub(r'(\#)(\S+)', r'hashtag_\2', texts)
-  texts = re.sub(r'(\$)([A-Za-z]+)', r'cashtag_\2', texts)
-  # remove usernames
-  texts = re.sub(r'(\@)(\S+)', r'mention_\2', texts)
-  # demojize
-  texts = emoji.demojize(texts, delimiters=("", " "))
-  return texts.strip()
-tokenizer_loaded = RobertaTokenizer.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
-model_loaded = RobertaForSequenceClassification.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
-nlp = pipeline("text-classification", model=model_loaded, tokenizer=tokenizer_loaded)
-sentences = pd.Series(['just buy','just sell it','entity rocket to the sky!','go down','even though it is going up, I still think it will not keep this trend in the near future'])
-# sentences = list(sentences.apply(process_text))
-sentences = list(sentences) # if input text contains https, @ or # or $ symbols, better apply preprocess to get a more accurate result
-results = nlp(sentences)
-print(results) # 2 labels, label 0 is bearish, label 1 is bullish
 ```

+---
+license: apache-2.0
+---
+## **Sentiment Inferencing model for stock related commments**
+### A project by NUS ISS students Frank Cao, Gerong Zhang, Jiaqi Yao, Sikai Ni, Yunduo Zhang
+<br />
+### Dataset
+This model is fine tuned with roberta-base model on 3200000 comments from stocktwits, with the user labeled tags 'Bullish' or 'Bearish'
+dataset link:
+<br />
+### Training information
+- batch size 32
+- learning rate 2e-5
+| Syntax      | Train loss  | Validation loss  | Validation accuracy |
+| ----------- | ----------- | ---------------- | ------------------- |
+| epoch1      | 0.3495      | 0.2956           | 0.8679              |
+| epoch2      | 0.2717      | 0.2235           | 0.9021              |
+| epoch3      | 0.2360      | 0.1875           | 0.9210              |
+| epoch4      | 0.2106      | 0.1603           | 0.9343              |
+# How to use
+```python
+from transformers import RobertaForSequenceClassification, RobertaTokenizer
+from transformers import pipeline
+import pandas as pd
+# the model was trained upon below preprocessing
+def process_text(texts):
+  # remove URLs
+  texts = re.sub(r'https?://\S+', "", texts)
+  texts = re.sub(r'www.\S+', "", texts)
+  # remove '
+  texts = texts.replace('&#39;', "'")
+  # remove symbol names
+  texts = re.sub(r'(\#)(\S+)', r'hashtag_\2', texts)
+  texts = re.sub(r'(\$)([A-Za-z]+)', r'cashtag_\2', texts)
+  # remove usernames
+  texts = re.sub(r'(\@)(\S+)', r'mention_\2', texts)
+  # demojize
+  texts = emoji.demojize(texts, delimiters=("", " "))
+  return texts.strip()
+tokenizer_loaded = RobertaTokenizer.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
+model_loaded = RobertaForSequenceClassification.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
+nlp = pipeline("text-classification", model=model_loaded, tokenizer=tokenizer_loaded)
+sentences = pd.Series(['just buy','just sell it','entity rocket to the sky!','go down','even though it is going up, I still think it will not keep this trend in the near future'])
+# sentences = list(sentences.apply(process_text))
+sentences = list(sentences) # if input text contains https, @ or # or $ symbols, better apply preprocess to get a more accurate result
+results = nlp(sentences)
+print(results) # 2 labels, label 0 is bearish, label 1 is bullish
 ```