Spaces:
Configuration error
Configuration error
lightningRalf
commited on
Commit
β’
2b127d5
1
Parent(s):
e48753b
Upload 4 files
Browse files- README.md +35 -13
- app.py +35 -0
- requirements.txt +5 -0
- token_counter.log +1 -0
README.md
CHANGED
@@ -1,13 +1,35 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Token Counter
|
2 |
+
|
3 |
+
![Release status](https://img.shields.io/badge/status-beta-blue?label=Release%20Status&style=plastic)
|
4 |
+
![GitHub last commit](https://img.shields.io/github/last-commit/LightningRalf/token_counter)
|
5 |
+
![GitHub repo size](https://img.shields.io/github/repo-size/LightningRalf/token_counter)
|
6 |
+
![GitHub issues](https://img.shields.io/github/issues/LightningRalf/token_counter)
|
7 |
+
![GitHub](https://img.shields.io/github/license/LightningRalf/token_counter)
|
8 |
+
|
9 |
+
Token Counter is a simple Python script that counts the number of tokens in a Markdown file. It's useful for analyzing and processing text data in natural language processing tasks.
|
10 |
+
|
11 |
+
## Installation
|
12 |
+
|
13 |
+
To use Token Counter, simply clone the repository:
|
14 |
+
|
15 |
+
```bash
|
16 |
+
git clone https://github.com/LightningRalf/token_counter.git
|
17 |
+
```
|
18 |
+
|
19 |
+
## Usage
|
20 |
+
|
21 |
+
To count the tokens in a Markdown file, run the `token_counter.py` script with the file path as an argument:
|
22 |
+
|
23 |
+
```bash
|
24 |
+
python token_counter.py path/to/your/markdown_file.md
|
25 |
+
```
|
26 |
+
|
27 |
+
The script will print the token count and also log the results in a log file.
|
28 |
+
|
29 |
+
## Contributing
|
30 |
+
|
31 |
+
We welcome contributions to improve Token Counter! Please feel free to open an issue or submit a pull request if you have any suggestions or improvements.
|
32 |
+
|
33 |
+
## License
|
34 |
+
|
35 |
+
This project is licensed under the CC0-1.0 License - see the [LICENSE](LICENSE) file for details.
|
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
import requests
|
4 |
+
import datetime
|
5 |
+
from dateutil.relativedelta import relativedelta
|
6 |
+
|
7 |
+
# Count tokens in a text string using a specified language model.
|
8 |
+
def count_tokens_text(text, model_name='gpt4'):
|
9 |
+
# (same as before)
|
10 |
+
|
11 |
+
# Fetch the most popular models from the last month
|
12 |
+
def get_popular_models():
|
13 |
+
one_month_ago = (datetime.datetime.now() - relativedelta(months=1)).strftime("%Y-%m-%d")
|
14 |
+
api_url = f"https://huggingface.co/api/models?sort=downloads&direction=desc&start_date={one_month_ago}"
|
15 |
+
response = requests.get(api_url)
|
16 |
+
data = response.json()
|
17 |
+
popular_models = [model["modelId"] for model in data["results"]]
|
18 |
+
return popular_models
|
19 |
+
|
20 |
+
# Streamlit app
|
21 |
+
st.title("Token Counter")
|
22 |
+
text = st.text_area("Text:", value="", height=200)
|
23 |
+
|
24 |
+
popular_models = get_popular_models()
|
25 |
+
model_name = st.selectbox("Model:", options=popular_models, index=0)
|
26 |
+
manual_entry = st.text_input("Or enter a model manually:", value="")
|
27 |
+
if manual_entry:
|
28 |
+
model_name = manual_entry
|
29 |
+
|
30 |
+
if st.button("Count Tokens"):
|
31 |
+
token_count, error = count_tokens_text(text, model_name)
|
32 |
+
if token_count is not None:
|
33 |
+
st.success(f"Token count: {token_count}")
|
34 |
+
elif error is not None:
|
35 |
+
st.error(f"Error: {error}")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
transformers
|
3 |
+
requests
|
4 |
+
datetime
|
5 |
+
dateutil.relativedelta
|
token_counter.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2023-04-29 10:16:15,841 - INFO - Token count for C:\Users\mjpa\Documents\Obsidian\20-29_Projekte\21_jPAw\21.96_MultiAgentSystem\OBJECTIVE-MAS-GITHUB-basedOnAgentLLM.md: 227
|