Spaces:
Sleeping
Sleeping
fix
Browse files- app.py +2 -3
- src/config.py +0 -62
- src/pipeline.py +1 -21
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
-
from src.config import config
|
| 4 |
from src.pipeline import predict_sentiment
|
| 5 |
|
| 6 |
|
|
@@ -8,8 +7,8 @@ def main():
|
|
| 8 |
st.title("Happy Entity Checker")
|
| 9 |
raw_text = st.text_area("Enter Text Here", "Type Here")
|
| 10 |
if st.button("Analyze"):
|
| 11 |
-
st.subheader(
|
| 12 |
-
result = predict_sentiment(raw_text,
|
| 13 |
st.markdown(result, unsafe_allow_html=True)
|
| 14 |
|
| 15 |
if __name__ == '__main__':
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
|
|
|
| 3 |
from src.pipeline import predict_sentiment
|
| 4 |
|
| 5 |
|
|
|
|
| 7 |
st.title("Happy Entity Checker")
|
| 8 |
raw_text = st.text_area("Enter Text Here", "Type Here")
|
| 9 |
if st.button("Analyze"):
|
| 10 |
+
st.subheader("Text-classification OMilosh/finetune_model")
|
| 11 |
+
result = predict_sentiment(raw_text, 'text-classification', "OMilosh/finetune_model")
|
| 12 |
st.markdown(result, unsafe_allow_html=True)
|
| 13 |
|
| 14 |
if __name__ == '__main__':
|
src/config.py
DELETED
|
@@ -1,62 +0,0 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
-
from typing import Optional, Union
|
| 3 |
-
|
| 4 |
-
from pydantic import Field
|
| 5 |
-
from pydantic_settings import BaseSettings
|
| 6 |
-
|
| 7 |
-
from src.constants import PROJECT_PATH
|
| 8 |
-
|
| 9 |
-
DATASET_DIR = PROJECT_PATH / "dataset"
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class DataConfig(BaseSettings):
|
| 13 |
-
train_file: Union[Path, str] = DATASET_DIR / "train.parquet"
|
| 14 |
-
val_file: Union[Path, str] = DATASET_DIR / "val.parquet"
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
class TrainerConfig(BaseSettings):
|
| 18 |
-
output_dir: Path = Field(
|
| 19 |
-
PROJECT_PATH / "finetuned_model", description="Directory to save the model"
|
| 20 |
-
)
|
| 21 |
-
evaluation_strategy: str = Field("epoch", description="Evaluation strategy")
|
| 22 |
-
save_strategy: str = Field("epoch", description="Model save strategy")
|
| 23 |
-
learning_rate: float = Field(2e-5, description="Learning rate")
|
| 24 |
-
per_device_train_batch_size: int = Field(2, description="Batch size for training")
|
| 25 |
-
per_device_eval_batch_size: int = Field(2, description="Batch size for evaluation")
|
| 26 |
-
num_train_epochs: int = Field(3, description="Number of epochs")
|
| 27 |
-
weight_decay: float = Field(0.01, description="Weight decay for optimizer")
|
| 28 |
-
logging_dir: Path = Field(PROJECT_PATH / "logs", description="Directory for logs")
|
| 29 |
-
logging_steps: int = Field(50, description="Log every X steps")
|
| 30 |
-
load_best_model_at_end: bool = Field(
|
| 31 |
-
True, description="Load best model at end of training"
|
| 32 |
-
)
|
| 33 |
-
metric_for_best_model: str = Field(
|
| 34 |
-
"eval_loss", description="Metric for best model selection"
|
| 35 |
-
)
|
| 36 |
-
save_total_limit: int = Field(1, description="Limit for total saved models")
|
| 37 |
-
report_to: str = Field("clearml", description="Reporting platform") # Или любой другой логгер, или можно убрать, в этом задании не важно:)
|
| 38 |
-
seed: int = Field(42, description="Random seed for reproducibility")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
class Config(BaseSettings):
|
| 42 |
-
|
| 43 |
-
model_name: Optional[str] = "ProsusAI/finbert"
|
| 44 |
-
task: Optional[str] = "text-classification"
|
| 45 |
-
|
| 46 |
-
# Для задания с датасетом заполните свой username с HF и придумайте имя для датасета
|
| 47 |
-
hf_user: Optional[str] = "OMilosh"
|
| 48 |
-
hf_dataset_name: Optional[str] = "test_dataset"
|
| 49 |
-
|
| 50 |
-
save_directory: Union[Path, str] = PROJECT_PATH / "output"
|
| 51 |
-
path_to_example: Union[Path, str] = PROJECT_PATH / "text_example" / "example.txt"
|
| 52 |
-
|
| 53 |
-
data_config: DataConfig = DataConfig()
|
| 54 |
-
trainer_config: TrainerConfig = TrainerConfig()
|
| 55 |
-
|
| 56 |
-
project_name: str = "hw3"
|
| 57 |
-
experiment_name: str = "fin_training"
|
| 58 |
-
save_model: str | Path = PROJECT_PATH / project_name
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
config = Config()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline.py
CHANGED
|
@@ -1,29 +1,9 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
from transformers import pipeline
|
| 3 |
|
| 4 |
-
from src.config import config
|
| 5 |
-
from src.file_utils import read_from_file, save_to_json
|
| 6 |
-
from src.logger import LOGGER
|
| 7 |
-
|
| 8 |
-
|
| 9 |
def predict_sentiment(text: str, task: str, model: str):
|
| 10 |
"""
|
| 11 |
Инициализируйте пайплайн и предскажите им текст
|
| 12 |
"""
|
| 13 |
pipe = pipeline(task, model=model)
|
| 14 |
pipeline_result = pipe(text)
|
| 15 |
-
return pipeline_result
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
if __name__ == "__main__":
|
| 19 |
-
input_file = Path(config.path_to_example)
|
| 20 |
-
text_data = read_from_file(input_file)
|
| 21 |
-
LOGGER.debug("Read data: %s", text_data)
|
| 22 |
-
|
| 23 |
-
result = predict_sentiment(text_data, config.task, config.model_name)
|
| 24 |
-
LOGGER.debug("Results:\n %s", result)
|
| 25 |
-
|
| 26 |
-
output_file = "sentiment_results.json"
|
| 27 |
-
path = Path(config.save_directory) / output_file
|
| 28 |
-
save_to_json(result, path)
|
| 29 |
-
LOGGER.debug("Results saved to: %s", path)
|
|
|
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
def predict_sentiment(text: str, task: str, model: str):
|
| 4 |
"""
|
| 5 |
Инициализируйте пайплайн и предскажите им текст
|
| 6 |
"""
|
| 7 |
pipe = pipeline(task, model=model)
|
| 8 |
pipeline_result = pipe(text)
|
| 9 |
+
return pipeline_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|