OMilosh commited on
Commit
4144c31
·
1 Parent(s): a5fc965
Files changed (3) hide show
  1. app.py +2 -3
  2. src/config.py +0 -62
  3. src/pipeline.py +1 -21
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
 
3
- from src.config import config
4
  from src.pipeline import predict_sentiment
5
 
6
 
@@ -8,8 +7,8 @@ def main():
8
  st.title("Happy Entity Checker")
9
  raw_text = st.text_area("Enter Text Here", "Type Here")
10
  if st.button("Analyze"):
11
- st.subheader(f"{config.task} {'OMilosh/finetune_model'}")
12
- result = predict_sentiment(raw_text, config.task, "OMilosh/finetune_model")
13
  st.markdown(result, unsafe_allow_html=True)
14
 
15
  if __name__ == '__main__':
 
1
  import streamlit as st
2
 
 
3
  from src.pipeline import predict_sentiment
4
 
5
 
 
7
  st.title("Happy Entity Checker")
8
  raw_text = st.text_area("Enter Text Here", "Type Here")
9
  if st.button("Analyze"):
10
+ st.subheader("Text-classification OMilosh/finetune_model")
11
+ result = predict_sentiment(raw_text, 'text-classification', "OMilosh/finetune_model")
12
  st.markdown(result, unsafe_allow_html=True)
13
 
14
  if __name__ == '__main__':
src/config.py DELETED
@@ -1,62 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional, Union
3
-
4
- from pydantic import Field
5
- from pydantic_settings import BaseSettings
6
-
7
- from src.constants import PROJECT_PATH
8
-
9
- DATASET_DIR = PROJECT_PATH / "dataset"
10
-
11
-
12
- class DataConfig(BaseSettings):
13
- train_file: Union[Path, str] = DATASET_DIR / "train.parquet"
14
- val_file: Union[Path, str] = DATASET_DIR / "val.parquet"
15
-
16
-
17
- class TrainerConfig(BaseSettings):
18
- output_dir: Path = Field(
19
- PROJECT_PATH / "finetuned_model", description="Directory to save the model"
20
- )
21
- evaluation_strategy: str = Field("epoch", description="Evaluation strategy")
22
- save_strategy: str = Field("epoch", description="Model save strategy")
23
- learning_rate: float = Field(2e-5, description="Learning rate")
24
- per_device_train_batch_size: int = Field(2, description="Batch size for training")
25
- per_device_eval_batch_size: int = Field(2, description="Batch size for evaluation")
26
- num_train_epochs: int = Field(3, description="Number of epochs")
27
- weight_decay: float = Field(0.01, description="Weight decay for optimizer")
28
- logging_dir: Path = Field(PROJECT_PATH / "logs", description="Directory for logs")
29
- logging_steps: int = Field(50, description="Log every X steps")
30
- load_best_model_at_end: bool = Field(
31
- True, description="Load best model at end of training"
32
- )
33
- metric_for_best_model: str = Field(
34
- "eval_loss", description="Metric for best model selection"
35
- )
36
- save_total_limit: int = Field(1, description="Limit for total saved models")
37
- report_to: str = Field("clearml", description="Reporting platform") # Или любой другой логгер, или можно убрать, в этом задании не важно:)
38
- seed: int = Field(42, description="Random seed for reproducibility")
39
-
40
-
41
- class Config(BaseSettings):
42
-
43
- model_name: Optional[str] = "ProsusAI/finbert"
44
- task: Optional[str] = "text-classification"
45
-
46
- # Для задания с датасетом заполните свой username с HF и придумайте имя для датасета
47
- hf_user: Optional[str] = "OMilosh"
48
- hf_dataset_name: Optional[str] = "test_dataset"
49
-
50
- save_directory: Union[Path, str] = PROJECT_PATH / "output"
51
- path_to_example: Union[Path, str] = PROJECT_PATH / "text_example" / "example.txt"
52
-
53
- data_config: DataConfig = DataConfig()
54
- trainer_config: TrainerConfig = TrainerConfig()
55
-
56
- project_name: str = "hw3"
57
- experiment_name: str = "fin_training"
58
- save_model: str | Path = PROJECT_PATH / project_name
59
-
60
-
61
-
62
- config = Config()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/pipeline.py CHANGED
@@ -1,29 +1,9 @@
1
- from pathlib import Path
2
  from transformers import pipeline
3
 
4
- from src.config import config
5
- from src.file_utils import read_from_file, save_to_json
6
- from src.logger import LOGGER
7
-
8
-
9
  def predict_sentiment(text: str, task: str, model: str):
10
  """
11
  Инициализируйте пайплайн и предскажите им текст
12
  """
13
  pipe = pipeline(task, model=model)
14
  pipeline_result = pipe(text)
15
- return pipeline_result
16
-
17
-
18
- if __name__ == "__main__":
19
- input_file = Path(config.path_to_example)
20
- text_data = read_from_file(input_file)
21
- LOGGER.debug("Read data: %s", text_data)
22
-
23
- result = predict_sentiment(text_data, config.task, config.model_name)
24
- LOGGER.debug("Results:\n %s", result)
25
-
26
- output_file = "sentiment_results.json"
27
- path = Path(config.save_directory) / output_file
28
- save_to_json(result, path)
29
- LOGGER.debug("Results saved to: %s", path)
 
 
1
  from transformers import pipeline
2
 
 
 
 
 
 
3
  def predict_sentiment(text: str, task: str, model: str):
4
  """
5
  Инициализируйте пайплайн и предскажите им текст
6
  """
7
  pipe = pipeline(task, model=model)
8
  pipeline_result = pipe(text)
9
+ return pipeline_result