Spaces:

OMilosh
/

finetune_finbert

Sleeping

App Files Files Community

OMilosh commited on Dec 8, 2024

Commit

4144c31

1 Parent(s): a5fc965

fix

Browse files

Files changed (3) hide show

app.py +2 -3
src/config.py +0 -62
src/pipeline.py +1 -21

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import streamlit as st
-from src.config import config
 from src.pipeline import predict_sentiment
@@ -8,8 +7,8 @@ def main():
     st.title("Happy Entity Checker")
     raw_text = st.text_area("Enter Text Here", "Type Here")
     if st.button("Analyze"):
-        st.subheader(f"{config.task} {'OMilosh/finetune_model'}")
-        result = predict_sentiment(raw_text, config.task, "OMilosh/finetune_model")
         st.markdown(result, unsafe_allow_html=True)
 if __name__ == '__main__':

 import streamlit as st
 from src.pipeline import predict_sentiment
     st.title("Happy Entity Checker")
     raw_text = st.text_area("Enter Text Here", "Type Here")
     if st.button("Analyze"):
+        st.subheader("Text-classification OMilosh/finetune_model")
+        result = predict_sentiment(raw_text, 'text-classification', "OMilosh/finetune_model")
         st.markdown(result, unsafe_allow_html=True)
 if __name__ == '__main__':

src/config.py DELETED Viewed

@@ -1,62 +0,0 @@
-from pathlib import Path
-from typing import Optional, Union
-from pydantic import Field
-from pydantic_settings import BaseSettings
-from src.constants import PROJECT_PATH
-DATASET_DIR = PROJECT_PATH / "dataset"
-class DataConfig(BaseSettings):
-    train_file: Union[Path, str] = DATASET_DIR / "train.parquet"
-    val_file: Union[Path, str] = DATASET_DIR / "val.parquet"
-class TrainerConfig(BaseSettings):
-    output_dir: Path = Field(
-        PROJECT_PATH / "finetuned_model", description="Directory to save the model"
-    )
-    evaluation_strategy: str = Field("epoch", description="Evaluation strategy")
-    save_strategy: str = Field("epoch", description="Model save strategy")
-    learning_rate: float = Field(2e-5, description="Learning rate")
-    per_device_train_batch_size: int = Field(2, description="Batch size for training")
-    per_device_eval_batch_size: int = Field(2, description="Batch size for evaluation")
-    num_train_epochs: int = Field(3, description="Number of epochs")
-    weight_decay: float = Field(0.01, description="Weight decay for optimizer")
-    logging_dir: Path = Field(PROJECT_PATH / "logs", description="Directory for logs")
-    logging_steps: int = Field(50, description="Log every X steps")
-    load_best_model_at_end: bool = Field(
-        True, description="Load best model at end of training"
-    )
-    metric_for_best_model: str = Field(
-        "eval_loss", description="Metric for best model selection"
-    )
-    save_total_limit: int = Field(1, description="Limit for total saved models")
-    report_to: str = Field("clearml", description="Reporting platform")  # Или любой другой логгер, или можно убрать, в этом задании не важно:)
-    seed: int = Field(42, description="Random seed for reproducibility")
-class Config(BaseSettings):
-    model_name: Optional[str] = "ProsusAI/finbert"
-    task: Optional[str] = "text-classification"
-    # Для задания с датасетом заполните свой username с HF и придумайте имя для датасета
-    hf_user: Optional[str] = "OMilosh"
-    hf_dataset_name: Optional[str] = "test_dataset"
-    save_directory: Union[Path, str] = PROJECT_PATH / "output"
-    path_to_example: Union[Path, str] = PROJECT_PATH / "text_example" / "example.txt"
-    data_config: DataConfig = DataConfig()
-    trainer_config: TrainerConfig = TrainerConfig()
-    project_name: str = "hw3"
-    experiment_name: str = "fin_training"
-    save_model: str | Path = PROJECT_PATH / project_name
-config = Config()

src/pipeline.py CHANGED Viewed

@@ -1,29 +1,9 @@
-from pathlib import Path
 from transformers import pipeline
-from src.config import config
-from src.file_utils import read_from_file, save_to_json
-from src.logger import LOGGER
 def predict_sentiment(text: str, task: str, model: str):
     """
     Инициализируйте пайплайн и предскажите им текст
     """
     pipe = pipeline(task, model=model)
     pipeline_result = pipe(text)
-    return pipeline_result
-if __name__ == "__main__":
-    input_file = Path(config.path_to_example)
-    text_data = read_from_file(input_file)
-    LOGGER.debug("Read data: %s", text_data)
-    result = predict_sentiment(text_data, config.task, config.model_name)
-    LOGGER.debug("Results:\n %s", result)
-    output_file = "sentiment_results.json"
-    path = Path(config.save_directory) / output_file
-    save_to_json(result, path)
-    LOGGER.debug("Results saved to: %s", path)

 from transformers import pipeline
 def predict_sentiment(text: str, task: str, model: str):
     """
     Инициализируйте пайплайн и предскажите им текст
     """
     pipe = pipeline(task, model=model)
     pipeline_result = pipe(text)
+    return pipeline_result