Jiahuita commited on
Commit
b2de734
1 Parent(s): c356db2

Attempt to resolve deployment issue

Browse files
Files changed (4) hide show
  1. README.md +11 -29
  2. app.py +15 -0
  3. pipeline.py +24 -41
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,4 +1,12 @@
1
  ---
 
 
 
 
 
 
 
 
2
  language: en
3
  license: mit
4
  tags:
@@ -43,40 +51,14 @@ This model classifies news headlines as either Fox News or NBC News using an LST
43
 
44
  ## Usage
45
 
46
- You can use this model directly with a FastAPI endpoint:
47
 
48
  ```python
49
  import requests
50
 
51
  # Make a prediction
52
  response = requests.post(
53
- "https://huggingface.co/Jiahuita/NewsSourceClassification/predict",
54
  json={"text": "Your news headline here"}
55
  )
56
- print(response.json())
57
- ```
58
-
59
- Or use it locally:
60
-
61
- ```python
62
- from transformers import pipeline
63
-
64
- classifier = pipeline("text-classification", model="Jiahuita/NewsSourceClassification")
65
- result = classifier("Your news headline here")
66
- print(result)
67
- ```
68
-
69
- ## Limitations and Bias
70
-
71
- This model has been trained on news headlines from specific sources and time periods, which may introduce certain biases. Users should be aware of these limitations when using the model.
72
-
73
- ## Training
74
-
75
- The model was trained using:
76
- - TensorFlow 2.13.0
77
- - LSTM architecture
78
- - Binary cross-entropy loss
79
- - Adam optimizer
80
-
81
- ## License
82
- This project is licensed under the MIT License.
 
1
  ---
2
+ title: News Source Classifier
3
+ emoji: 📰
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: fastapi
7
+ sdk_version: 0.95.2
8
+ app_file: app.py
9
+ pinned: false
10
  language: en
11
  license: mit
12
  tags:
 
51
 
52
  ## Usage
53
 
54
+ You can use this model through the FastAPI endpoint:
55
 
56
  ```python
57
  import requests
58
 
59
  # Make a prediction
60
  response = requests.post(
61
+ "https://huggingface.co/Jiahuita/NewsSourceClassification",
62
  json={"text": "Your news headline here"}
63
  )
64
+ print(response.json())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+
5
+ app = FastAPI()
6
+
7
+ class TextInput(BaseModel):
8
+ text: str
9
+
10
+ classifier = pipeline("text-classification", model="./")
11
+
12
+ @app.post("/predict")
13
+ async def predict(input_data: TextInput):
14
+ result = classifier(input_data.text)
15
+ return result
pipeline.py CHANGED
@@ -1,52 +1,35 @@
1
- from transformers import PreTrainedModel, PretrainedConfig
2
- from tensorflow.keras.models import load_model
3
- from tensorflow.keras.preprocessing.text import tokenizer_from_json
4
  from tensorflow.keras.preprocessing.sequence import pad_sequences
5
- import os
6
- import numpy as np
7
  import json
 
8
 
9
- class NewsClassifierConfig(PretrainedConfig):
10
- model_type = "news_classifier"
11
-
12
- def __init__(self, max_length=128, **kwargs):
13
- self.max_length = max_length
14
- super().__init__(**kwargs)
15
 
16
- class NewsClassifier(PreTrainedModel):
17
- config_class = NewsClassifierConfig
18
-
19
- def __init__(self, config):
20
- super().__init__(config)
21
  model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5')
 
 
22
  tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json')
 
23
 
24
- self.model = load_model(model_path)
25
- with open(tokenizer_path, 'r') as f:
26
- tokenizer_data = json.load(f)
27
- self.tokenizer = tokenizer_from_json(tokenizer_data)
28
 
29
- def forward(self, text_input):
30
- if isinstance(text_input, str):
31
- sequences = self.tokenizer.texts_to_sequences([text_input])
32
- else:
33
- sequences = self.tokenizer.texts_to_sequences(text_input)
34
-
35
- padded = pad_sequences(sequences, maxlen=self.config.max_length)
36
  predictions = self.model.predict(padded)
37
 
38
  results = []
39
- for score in predictions:
40
- label = "foxnews" if score[0] > 0.5 else "nbc"
41
- results.append({
42
- "label": label,
43
- "score": float(score[0] if label == "foxnews" else 1 - score[0])
44
- })
45
-
46
- return results[0] if isinstance(text_input, str) else results
47
-
48
- @classmethod
49
- def from_pretrained(cls, model_path, **kwargs):
50
- config = NewsClassifierConfig.from_pretrained(model_path)
51
- model = cls(config)
52
- return model
 
1
+ from transformers import Pipeline
2
+ import tensorflow as tf
 
3
  from tensorflow.keras.preprocessing.sequence import pad_sequences
 
 
4
  import json
5
+ import os
6
 
7
+ def load_tokenizer(tokenizer_path):
8
+ with open(tokenizer_path, 'r') as f:
9
+ return json.load(f)
 
 
 
10
 
11
+ class NewsClassificationPipeline(Pipeline):
12
+ def __init__(self, model=None, tokenizer=None, **kwargs):
13
+ super().__init__(**kwargs)
 
 
14
  model_path = os.path.join(os.path.dirname(__file__), 'news_classifier.h5')
15
+ self.model = tf.keras.models.load_model(model_path)
16
+
17
  tokenizer_path = os.path.join(os.path.dirname(__file__), 'tokenizer.json')
18
+ self.tokenizer_config = load_tokenizer(tokenizer_path)
19
 
20
+ def __call__(self, texts, **kwargs):
21
+ if isinstance(texts, str):
22
+ texts = [texts]
 
23
 
24
+ sequences = self.tokenizer.texts_to_sequences(texts)
25
+ padded = pad_sequences(sequences, maxlen=128)
26
+
 
 
 
 
27
  predictions = self.model.predict(padded)
28
 
29
  results = []
30
+ for pred in predictions:
31
+ label = "foxnews" if pred[0] > 0.5 else "nbc"
32
+ score = float(pred[0] if label == "foxnews" else 1 - pred[0])
33
+ results.append({"label": label, "score": score})
34
+
35
+ return results[0] if isinstance(texts, str) else results
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -2,3 +2,6 @@ tensorflow>=2.10.0
2
  transformers>=4.46.3
3
  numpy>=1.19.2
4
  scikit-learn>=0.24.2
 
 
 
 
2
  transformers>=4.46.3
3
  numpy>=1.19.2
4
  scikit-learn>=0.24.2
5
+ fastapi>=0.68.0
6
+ uvicorn>=0.15.0
7
+ pydantic>=1.8.2