edithram23 commited on
Commit
508f4e9
1 Parent(s): 3dfc26c
Files changed (4) hide show
  1. Dockerfile +27 -0
  2. README.md +11 -11
  3. main.py +39 -0
  4. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.9
2
+
3
+ # Create user with home directory
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Switch to the created user
7
+ USER user
8
+
9
+ # Set environment variables
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH \
12
+ TRANSFORMERS_CACHE=/home/user/.cache
13
+
14
+ # Create necessary directories with the right permissions
15
+ RUN mkdir -p $HOME/.cache && chmod -R 777 $HOME/.cache
16
+
17
+ # Set the working directory
18
+ WORKDIR $HOME/Redaction
19
+
20
+ # Copy project files to the working directory
21
+ COPY --chown=user . $HOME/Redaction
22
+
23
+ # Install dependencies
24
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
+
26
+ # Set the command to run the application
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,11 @@
1
- ---
2
- title: Model
3
- emoji: 📚
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- license: other
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Redaction
3
+ emoji: 🔥
4
+ colorFrom: indigo
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
main.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TRANSFORMERS_CACHE"] = "/.cache"
3
+
4
+ import re
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
+
7
+ model_dir = 'edithram23/Redaction'
8
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
10
+
11
+ def mask_generation(text):
12
+ import re
13
+ inputs = ["Mask Generation: " + text]
14
+ inputs = tokenizer(inputs, max_length=500, truncation=True, return_tensors="pt")
15
+ output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=len(text)+10)
16
+ decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
17
+ predicted_title = decoded_output.strip()
18
+ pattern = r'\[.*?\]'
19
+ # Replace all occurrences of the pattern with [redacted]
20
+ redacted_text = re.sub(pattern, '[redacted]', predicted_title)
21
+ return redacted_text
22
+
23
+ from fastapi import FastAPI
24
+ import uvicorn
25
+
26
+ app = FastAPI()
27
+
28
+ @app.get("/")
29
+ async def hello():
30
+ return {"msg" : "Live"}
31
+
32
+ @app.post("/mask")
33
+ async def mask_input(query):
34
+ output = mask_generation(query)
35
+ return {"data" : output}
36
+
37
+ if __name__ == '__main__':
38
+ os.environ["TRANSFORMERS_CACHE"] = "/.cache"
39
+ uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True, workers=1)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ transformers==4.41.2
3
+ uvicorn==0.30.1
4
+ torch