premmukund commited on
Commit
4f49ea4
1 Parent(s): 282546b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import wordnet
3
+ import streamlit as st
4
+
5
+ # Download required NLTK resources if you haven't already
6
+ nltk.download('punkt')
7
+ nltk.download('wordnet')
8
+ nltk.download('averaged_perceptron_tagger')
9
+
10
+ def rewrite_content(text):
11
+ # Tokenize the text
12
+ tokens = nltk.word_tokenize(text)
13
+
14
+ # Tag the tokens with their part-of-speech
15
+ tagged_tokens = nltk.pos_tag(tokens)
16
+
17
+ # Iterate over the tagged tokens and replace words with synonyms
18
+ rewritten_text = []
19
+ for token, tag in tagged_tokens:
20
+ # Fetch synonyms for the word
21
+ synonyms = wordnet.synsets(token)
22
+ if synonyms:
23
+ # Choose the first synonym
24
+ synonym = synonyms[0].lemmas()[0].name()
25
+ # Ensure the synonym is not the same as the original word
26
+ if synonym.lower() != token.lower():
27
+ rewritten_text.append(synonym.replace('_', ' '))
28
+ else:
29
+ rewritten_text.append(token)
30
+ else:
31
+ rewritten_text.append(token)
32
+
33
+ # Join the rewritten tokens back into a string
34
+ rewritten_content = ' '.join(rewritten_text)
35
+
36
+ # Maintain paragraph structure
37
+ paragraphs = text.split('\n')
38
+ rewritten_paragraphs = []
39
+ token_index = 0
40
+
41
+ for paragraph in paragraphs:
42
+ paragraph_tokens = nltk.word_tokenize(paragraph)
43
+ rewritten_paragraph = ' '.join(rewritten_text[token_index:token_index + len(paragraph_tokens)])
44
+ rewritten_paragraphs.append(rewritten_paragraph)
45
+ token_index += len(paragraph_tokens)
46
+
47
+ return '\n'.join(rewritten_paragraphs)
48
+
49
+ # Create a Streamlit interface
50
+ st.title("Text Rewriter")
51
+ st.write("Note: The rewrite content limit is 200000 words.")
52
+
53
+ input_text = st.text_area("Enter text to rewrite", height=300)
54
+
55
+ # Count words in the input text
56
+ word_count = len(nltk.word_tokenize(input_text))
57
+
58
+ # Display the current word count
59
+ st.write(f"Word count: {word_count} / 200000")
60
+
61
+ if st.button("Rewrite"):
62
+ if input_text:
63
+ if word_count <= 200000:
64
+ rewritten_text = rewrite_content(input_text)
65
+ st.subheader("Rewritten Text")
66
+ st.text_area("", rewritten_text, height=300)
67
+ else:
68
+ st.error("The text exceeds 200000 words. Please enter fewer words.")
69
+ else:
70
+ st.error("Please enter some text.")