Spaces:

xxparthparekhxx
/

Contact-Sharing-Recognizer-API

Sleeping

App Files Files Community

parth parekh commited on Oct 2

Commit

7e63028

•

0 Parent(s):

working demo

Browse files

Files changed (14) hide show

.gitattributes +35 -0
.gitignore +1 -0
Dockerfile +28 -0
README.md +10 -0
__pycache__/app.cpython-312.pyc +0 -0
__pycache__/predictor.cpython-312.pyc +0 -0
__pycache__/test.cpython-312.pyc +0 -0
app.py +100 -0
contact_sharing_epoch_1.pth +3 -0
load_test.py +67 -0
predictor.py +128 -0
requirements.txt +70 -0
test.py +166 -0
vocab.pth +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM python:3.12-slim
+# Create a new user
+RUN useradd -m user
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgl1-mesa-glx \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Change ownership of the /app directory to the new user
+RUN chown -R user:user /app
+# Switch to the new user
+USER user
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "4"]

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Contact Sharing Recognizer API
+emoji: 🤙
+colorFrom: indigo
+colorTo: pink
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (4.43 kB). View file

__pycache__/predictor.cpython-312.pyc ADDED Viewed

Binary file (9.06 kB). View file

__pycache__/test.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import torch
+from torch.nn.functional import softmax
+import re
+from predictor import predict, batch_predict  # Assuming batch_predict is in predictor module
+app = FastAPI(
+    title="Contact Information Detection API",
+    description="API for detecting contact information in text, great thanks to xxparthparekhxx/ContactShieldAI for the model",
+    version="1.0.0",
+    docs_url="/"
+)
+def preprocess_text(text):
+    # Remove all punctuation except for @ and . which are often used in email addresses
+    return re.sub(r'[^\w\s@.]', '', text)
+class TextInput(BaseModel):
+    text: str
+class BatchTextInput(BaseModel):
+    texts: list[str]
+def check_regex_patterns(text):
+    patterns = [
+        r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',  # Email
+        r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',  # Phone number
+        r'\b\d{5}(?:[-\s]\d{4})?\b',  # ZIP code
+        r'\b\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|highway|hwy|square|sq|trail|trl|drive|dr|court|ct|park|parkway|pkwy|circle|cir|boulevard|blvd)\b\s*(?:[a-z]+\s*\d{1,3})?(?:,\s*(?:apt|bldg|dept|fl|hngr|lot|pier|rm|ste|unit|#)\s*[a-z0-9-]+)?(?:,\s*[a-z]+\s*[a-z]{2}\s*\d{5}(?:-\d{4})?)?',  # Street address
+        r'(?:http|https)://(?:www\.)?[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(?:/[^\s]*)?'  # Website URL
+    ]
+    for pattern in patterns:
+        if re.search(pattern, text, re.IGNORECASE):
+            return True
+    return False
+@app.post("/detect_contact", summary="Detect contact information in text")
+async def detect_contact(input: TextInput):
+    try:
+        preprocessed_text = preprocess_text(input.text)
+        # First, check with regex patterns
+        if check_regex_patterns(preprocessed_text):
+            return {
+                "text": input.text,
+                "is_contact_info": True,
+                "method": "regex"
+            }
+        # If no regex patterns match, use the model
+        is_contact = predict(preprocessed_text)
+        return {
+            "text": input.text,
+            "is_contact_info": is_contact == 1,
+            "method": "model"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/batch_detect_contact", summary="Detect contact information in batch of texts")
+async def batch_detect_contact(inputs: BatchTextInput):
+    try:
+        # Preprocess all texts
+        preprocessed_texts = [preprocess_text(text) for text in inputs.texts]
+        # First, use regex to check patterns
+        regex_results = [check_regex_patterns(text) for text in preprocessed_texts]
+        # For texts where regex doesn't detect anything, use the model
+        texts_for_model = [text for text, regex_match in zip(preprocessed_texts, regex_results) if not regex_match]
+        if texts_for_model:
+            model_results = batch_predict(texts_for_model)
+        else:
+            model_results = []
+        # Prepare final results
+        results = []
+        model_idx = 0
+        for i, text in enumerate(preprocessed_texts):
+            if regex_results[i]:
+                results.append({
+                    "text": inputs.texts[i],
+                    "is_contact_info": True,
+                    "method": "regex"
+                })
+            else:
+                is_contact = model_results[model_idx]
+                results.append({
+                    "text": inputs.texts[i],
+                    "is_contact_info": bool(is_contact),  # Convert numpy bool
+                    "method": "model"
+                })
+                model_idx += 1
+        return results
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

contact_sharing_epoch_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdb70e711c212856ce3df95b82afbae57b8fc34243b3f541ecd65963fa81fd92
+size 813497259

load_test.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import asyncio
+import aiohttp
+import json
+from tqdm.asyncio import tqdm
+import time
+from test import test_texts
+url = "https://vidhitmakvana1-contact-sharing-recognizer-api.hf.space/detect_contact"
+concurrent_requests = 2
+async def process_text(session, text, semaphore):
+    payload = {"text": text}
+    headers = {"Content-Type": "application/json"}
+    async with semaphore:
+        start_time = time.time()
+        while True:
+            async with session.post(url, data=json.dumps(payload), headers=headers) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    end_time = time.time()
+                    result['response_time'] = end_time - start_time
+                    return result
+                elif response.status == 429:
+                    print(f"Rate limit exceeded. Waiting for 60 seconds before retrying...")
+                    await asyncio.sleep(60)
+                else:
+                    print(f"Error for text: {text}")
+                    print(f"Status code: {response.status}")
+                    print(f"Response: {await response.text()}")
+                    return None
+async def main():
+    semaphore = asyncio.Semaphore(concurrent_requests)
+    async with aiohttp.ClientSession() as session:
+        tasks = [process_text(session, text, semaphore) for text in [*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts,*test_texts]]
+        results = await tqdm.gather(*tasks)
+    correct_predictions = 0
+    total_predictions = len(results)
+    total_response_time = 0
+    for text, result in zip(test_texts, results):
+        if result:
+            print(f"Text: {result['text']}")
+            print(f"Contact Probability: {result['contact_probability']:.4f}")
+            print(f"Is Contact Info: {result['is_contact_info']}")
+            print(f"Response Time: {result['response_time']:.4f} seconds")
+            print("---")
+            if result['is_contact_info']:
+                correct_predictions += 1
+            total_response_time += result['response_time']
+    accuracy = correct_predictions / (total_predictions * 37)
+    average_response_time = total_response_time / total_predictions
+    print(f"Accuracy: {accuracy:.2f}")
+    print(f"Average Response Time: {average_response_time:.4f} seconds")
+if __name__ == "__main__":
+    while True:
+        start_time = time.time()
+        asyncio.run(main())
+        end_time = time.time()
+        total_time = end_time - start_time
+        print(f"\nTotal execution time: {total_time:.2f} seconds")

predictor.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchtext.vocab import build_vocab_from_iterator, GloVe
+from torchtext.data.utils import get_tokenizer
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+class ContactSharingClassifier(nn.Module):
+    def __init__(self, vocab_size, embed_dim, num_filters, filter_sizes, lstm_hidden_dim, output_dim, dropout, pad_idx):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)
+        self.lstm = nn.LSTM(embed_dim, lstm_hidden_dim, bidirectional=True, batch_first=True)
+        self.convs = nn.ModuleList([
+            nn.Conv1d(in_channels=lstm_hidden_dim*2, out_channels=num_filters, kernel_size=fs)
+            for fs in filter_sizes
+        ])
+        self.fc1 = nn.Linear(len(filter_sizes) * num_filters, len(filter_sizes) * num_filters // 2)
+        self.fc2 = nn.Linear(len(filter_sizes) * num_filters // 2, output_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(len(filter_sizes) * num_filters)
+    def forward(self, text):
+        embedded = self.embedding(text)
+        lstm_out, _ = self.lstm(embedded)
+        lstm_out = lstm_out.permute(0, 2, 1)
+        conved = [F.relu(conv(lstm_out)) for conv in self.convs]
+        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
+        cat = self.dropout(torch.cat(pooled, dim=1))
+        cat = self.layer_norm(cat)
+        x = F.relu(self.fc1(cat))
+        x = self.dropout(x)
+        return self.fc2(x)
+# Initialize tokenizer and vocabulary
+tokenizer = get_tokenizer("spacy", language="en_core_web_sm")
+vocab = torch.load('vocab.pth')  # Assuming you've saved the vocabulary
+# Define text pipeline
+def text_pipeline(x):
+    return [vocab[token] for token in tokenizer(x)]
+# Model parameters
+VOCAB_SIZE = len(vocab)
+EMBED_DIM = 600
+NUM_FILTERS = 600
+FILTER_SIZES = [3, 4, 5, 6, 7, 8, 9, 10]
+LSTM_HIDDEN_DIM = 768
+OUTPUT_DIM = 2
+DROPOUT = 0.5
+PAD_IDX = vocab["<pad>"]
+# Load the model
+model = ContactSharingClassifier(VOCAB_SIZE, EMBED_DIM, NUM_FILTERS, FILTER_SIZES, LSTM_HIDDEN_DIM, OUTPUT_DIM, DROPOUT, PAD_IDX)
+model.load_state_dict(torch.load('contact_sharing_epoch_1.pth', map_location=device))
+model.to(device)
+model.eval()
+# Test sentences
+test_sentences = [
+    "You can reach me at my electronic mail address, it's my first name dot last name at that popular search engine company's mail service.",
+    "Call me on my cellular device, the digits are the same as the year the Declaration of Independence was signed, followed by my birth year, twice.",
+    "Visit my online presence at triple w dot my full name without spaces or punctuation dot com.",
+    "Send a message to username 'not_my_real_name' on that instant messaging platform that starts with 'disc' and ends with 'ord'.",
+    "My contact info is hidden in this sentence: Eight Six Seven Five Three Oh Nine.",
+    "Find me on the professional networking site, just search for my name plus 'software engineer in San Francisco'.",
+    "My handle on the bird-themed social media platform is at symbol followed by 'definitely_not_my_email_address'.",
+    "You know that video sharing site? My channel is there, just add 'cool_coder_' before my full name, all lowercase.",
+    "I'm listed in the phone book under 'Smith, John' but replace 'Smith' with my actual last name and 'John' with my first name.",
+    "My contact details are encrypted: Rot13('zl.rznvy@tznvy.pbz')",
+    # New non-contact sharing examples
+    "The weather today is absolutely beautiful, perfect for a picnic in the park.",
+    "I'm really excited about the new sci-fi movie coming out next month.",
+    "Did you hear about the latest advancements in artificial intelligence? It's fascinating!",
+    "I'm planning to go hiking this weekend in the nearby mountains.",
+    "The recipe calls for two cups of flour and a pinch of salt.",
+    "The annual tech conference will be held virtually this year due to ongoing health concerns.",
+    "I've been learning to play the guitar for the past six months. It's challenging but rewarding.",
+    "The local farmer's market has the freshest produce every Saturday morning.",
+    "Did you catch the game last night? It was an incredible comeback in the final quarter!",
+    "Lets do '42069' tonight it will be really fun what do you say ?"
+]
+# JIT Script the model for faster inference
+scripted_model = torch.jit.script(model)
+# Preallocate padding tensor to avoid repeated memory allocation
+MAX_LEN = max(FILTER_SIZES)
+padding_tensor = torch.zeros(1, MAX_LEN, dtype=torch.long).to(device)
+# Prediction function using JIT and inference optimizations
+def predict(text):
+    with torch.inference_mode():  # Use inference mode instead of no_grad
+        inputs = torch.tensor([text_pipeline(text)]).to(device)
+        # Perform padding if necessary
+        if inputs.size(1) < MAX_LEN:
+            inputs = torch.cat([inputs, padding_tensor[:, :MAX_LEN - inputs.size(1)]], dim=1)
+        # Pass inputs through the scripted model
+        outputs = scripted_model(inputs)
+        # Return predicted class
+        return torch.argmax(outputs, dim=1).item()
+def batch_predict(texts):
+    with torch.inference_mode():  # Use inference mode for better performance
+        # Tokenize and convert to tensors
+        inputs = [torch.tensor(text_pipeline(text)) for text in texts]
+        # Pad all sequences to the length of the longest one in the batch
+        max_len = max(len(seq) for seq in inputs)
+        padded_inputs = torch.stack([torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)]) for seq in inputs]).to(device)
+        # Pass the batch through the scripted model
+        outputs = scripted_model(padded_inputs)
+        # Return predicted classes for each sentence
+        predictions = torch.argmax(outputs, dim=1).cpu().numpy()
+        return predictions
+# Test the sentences
+for i, sentence in enumerate(test_sentences, 1):
+    prediction = predict(sentence)
+    result = "Contains contact info" if prediction == 1 else "No contact info"
+    print(f"Sentence {i}: {result}")
+    print(f"Text: {sentence}\n")

requirements.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+annotated-types==0.7.0
+anyio==4.6.0
+blis==0.7.11
+catalogue==2.0.10
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpathlib==0.19.0
+colorama==0.4.6
+confection==0.1.5
+cymem==2.0.8
+distro==1.9.0
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
+fastapi==0.115.0
+filelock==3.13.1
+fsspec==2024.2.0
+greenlet==3.1.1
+groq==0.11.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.25.1
+idna==3.10
+Jinja2==3.1.3
+langcodes==3.4.0
+language_data==1.2.0
+marisa-trie==1.2.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+murmurhash==1.0.10
+networkx==3.2.1
+numpy==1.26.3
+packaging==24.1
+pillow==10.2.0
+preshed==3.0.9
+pydantic==2.9.2
+pydantic_core==2.23.4
+Pygments==2.18.0
+PyYAML==6.0.2
+regex==2024.9.11
+requests==2.32.3
+rich==13.8.1
+safetensors==0.4.5
+setuptools==70.0.0
+shellingham==1.5.4
+smart-open==7.0.4
+sniffio==1.3.1
+spacy==3.7.6
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+SQLAlchemy==2.0.35
+srsly==2.4.8
+starlette==0.38.5
+sympy==1.12
+thinc==8.2.5
+tokenizers==0.19.1
+torch==2.2.0
+torchdata==0.7.1
+torchtext==0.16.2
+tqdm==4.66.5
+transformers==4.44.2
+typer==0.12.5
+typing_extensions==4.12.2
+urllib3==2.2.3
+uvicorn==0.30.6
+wasabi==1.1.3
+weasel==0.4.1
+wrapt==1.16.0

test.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import asyncio
+import aiohttp
+import json
+from tqdm.asyncio import tqdm
+test_texts = [
+    "You can reach me at triple eight, then the square of 7, followed by 2^10",
+    "Drop a line to first_name [underscore] last_name at that company with a fruit logo dot com",
+    "Find me on the platform where professionals connect: J. Doe, Senior Developer at TechCorp",
+    "Message me on that app with the ghost icon: @ShadowWhisperer2023",
+    "Contact via carrier pigeon: coordinates 40.7128° N, 74.0060° W",
+    "Ping me on the federated network: @cooluser@mastodon.social",
+    "My contact is the reverse of moc.elpmaxe@eodnhoj",
+    "Reach out using morse: -... -.-- -....- . -- .- .. .-..",
+    "Find me on the platform with blue checkmarks: @RealJohnDoe (parody)",
+    "Send a message to username 'l33tc0d3r' on that platform for developers",
+    "You can locate me at the place where the streets have no name, in the city of angels",
+    "My digits are the Fibonacci sequence up to 21, concatenated",
+    "Contact: foxtrot oscar oscar at bravo alpha romeo dot charlie oscar mike",
+    "Beep me at the number you get when you multiply 555 by 1.5, then add 867-5309",
+    "I'm on that app where you share shortvideos: @Dancing2023",
+    "Reach out via electronic mail to 'lastnamefirstinitial' at that search engine company dot com",
+    "Call me at the number you get when you solve this equation: 2x + 5 = 13, then 555-MATH",
+    "My handle on that photo-sharing app is @SunsetSnapper_42",
+    "You can find me at the intersection of Binary Boulevard and Algorithm Avenue",
+    "Contact info: romeo oscar charlie kilo echo tango mike alpha november at zulu uniform lima uniform dot india oscar",
+    "Find me at 51.4778° N, and solve for x: x - 0.0019 = 0.1278° W",
+    "DM me at 📧👤💻🐦. Guess the platform 😉",
+    "If you add 2 to the area code of Los Angeles, you'll find the first 3 digits of my number",
+    "Ping me on the platform with 2 birds in its logo (and no, it's not a zoo!)",
+    "You can decode my email address: base64 for JmRvZGVAc2FtcGxlLmNvbQ==",
+    "You’ll find me on the platform that rhymes with 'squeaker' and involves chirps",
+    "If you reverse the letters of com.gmail@john and remove 'moc', you'll get my contact",
+    "For inquiries, try contacting me at 'first name.last name', but think of the sound fruit makes when it's dropped",
+    "Use morse to reach out: dash dot dot dash underscore underscore dash dot dot (first name at techcorp dot com)",
+    "Contact: solve 5x - 3 = 12 for x, that’s my lucky number for the area code, followed by the square root of 144 for the rest",
+    "Reach out on that site where professionals connect, my name rhymes with 'noe' and I’m a senior engineer at T-Corp",
+    "Shoot me a message on the photo-sharing app where sunsets get all the likes: handle is the same as my name in reverse",
+    "If you count the number of words in 'five stars' you'll get the first two digits of my handle on that coding platform",
+    "My email is hidden: find the cube root of 27, followed by the first name of a famous fruit and 'dot com'",
+    "If you multiply the number of days in March by 5, you'll get my contact digits",
+    "Contact me on the short-video app, my handle starts with a 'D' and ends with '23'!",
+    "Try to find me where algorithms reign and the search begins: think of a query that contains my last name and 'solutions'",
+    "Use binary to get my location: 01000101 01001110 01000111",
+    "You can ping me at 'bestcoder42' on the app where code flows like water",
+    "My digits: sum of first four Fibonacci numbers for the area code, and the next three for the phone number",
+      "Find me at 51.4778° N, and solve for x: x - 0.0019 = 0.1278° W",
+    "DM me at 📧👤💻🐦. Guess the platform 😉",
+    "If you add 2 to the area code of Los Angeles, you'll find the rest of my digits hiding nearby",
+    "Ping me at 'FirstnameLastname reversed' at that search company 🧐",
+    "The sum of the first two primes gives you the first part of my number, and 10 squared gets you the rest",
+    "Drop a message on the 'app named after a bird' to @JohnDoe2024 🌐",
+    "Morse this one: .... . .-.. .-.. --- @ secret-agent",
+    "Let’s connect: 3rd letter of my last name, then an underscore, then my birth year at fruit-company dot com 🍏",
+    "I'm on the platform for professionals but my handle is just a smiley face, hint hint 😉",
+    "Look up the coordinates of Big Ben and you might just find where I hang out 🕰️",
+    "Combine the atomic number of helium with my favorite fruit and you'll get my email",
+    "Find me at 'underscore emoji fan' at the app where people share funny short videos 🤳",
+    "Think of the number 404, then multiply it by 2, that’s the area code. The rest is easy!",
+    "I'm always up for a chat, just decode 01000011 01100001 01101100 01101100",
+    "I’m @SilentWhisper42 on the app where conversations vanish into thin air 👻",
+    "Track me down with this: Alpha-Bravo-Charlie at that company with flying machines ✈️",
+    "Ever heard of Fibonacci? My digits follow the pattern, up to 21",
+    "Search for the name of the singer of 'Rocket Man,' and you'll have part of my contact info 🧑‍🚀",
+    "Just send a message to 'TechWizard' at the email service that rhymes with whale-mail 🐋",
+    "My username on that site for devs is 'leet_hacker', but you’ll need to solve for x to figure out the rest!",
+ "My digits? Picture the number of planets in the solar system before 2006, then square it.",
+    "If you know the atomic numbers of oxygen and hydrogen, combine them and you have my first two digits.",
+    "Contact me where knowledge is power, at the symbol of illumination followed by 'dot org'.",
+    "I'm @user and you'll find me on the app where one tweets, but reverse that bird's sound first.",
+    "Think of the area code for New York, subtract one, and you’re almost there.",
+    "Reach out at 'wizard@', then imagine the home of the brave and the land of the free, followed by 'com'.",
+    "My handle is a palindrome on that platform where people share their lives one square at a time.",
+    "Find me at the intersection of 7 squared and the cube root of 8, you'll know the digits.",
+    "Ping me at 'Firstname reversed' dot 'company with a shopping cart logo'.",
+    "Send a message to the name of the president in 1993 at the platform where developers share code.",
+    "You can reach me at the number that shares its name with a famous Chicago bull, then add 10.",
+    "Reach out on the platform with the blue checkmarks, where I’m known as '56/8'.",
+    "The username is easy if you know your ASCII: 83 117 110 83 101 116 52 50.",
+    "For contact info, divide the year Armstrong walked on the moon by two and add the last prime number.",
+    "You can email me at the world's largest retailer with a name that rhymes with 'Hamazon'.",
+    "Catch me on the app where professionals hang out: it’s the opposite of 'InTouch'.",
+    "Look for me on the 'bird app' where my handle is my initials followed by the number of days in a leap year.",
+    "Ping me at 'Jupiter's largest moon' dot 'the company that sends rockets into space'.",
+    "Reach out at the sum of the angles in a triangle, followed by 'degrees at mail dot com'.",
+    "Message me where bytes are shared: I go by '@user_hexadecimal_4D2' on that site.",
+    "You can send it to 🌍 world_dot_explorer @ 'web page where you explore the world'.",
+    "数字 4 (Chinese), then 'underscore', then 'techie' at the search giant.",
+    "Write to me at the country with a maple leaf symbol, at their email provider.",
+    "Feel free to ping me at Жака at mail dot ru (that’s Russian for Jack).",
+    "If you take the French word for 'sun' and add 'shine', that’s where you can reach me.",
+    "My digits? They hide in plain sight: 42-4*8+18. Just subtract the stars.",
+    "Drop me a line at 'developer' followed by the country code for India, dot com.",
+    "Where to find me? It's obvious: 'who's' dot 'this', at the dot that ends with 'gov'.",
+    "A long story short: email me at 'fruit-company', the one that used to be a tree 🌳.",
+    "You'll get my email by figuring out: my first pet’s name, the city I grew up in, dot org.",
+    "My number? It's encrypted as SHA-256. Just decode it and you'll know!",
+    "Write to me in the ancient language of the Romans: 'maximus at something_prime dot com'.",
+    "Reach out to the winner of 2022's football world cup at 'world champions dot com'.",
+    "Find me at the place where the Eiffel Tower stands tall, at dot 'home of baguettes'.",
+    "Hit me up via snail mail: Just translate 'rabbit' into Italian and add 'at Italian mail'.",
+    "My digits form a prime sequence starting from 11, just keep counting!",
+    "For my number, follow the clues hidden in Da Vinci's most famous painting."
+]
+import time
+# url = "https://vidhitmakvana1-contact-sharing-recognizer-api.hf.space/batch_detect_contact"
+url = "http://localhost:8000/batch_detect_contact"
+async def process_batch(session, texts):
+    payload = {"texts": texts}
+    headers = {"Content-Type": "application/json"}
+    start_time = time.time()
+    async with session.post(url, data=json.dumps(payload), headers=headers) as response:
+        if response.status == 200:
+            results = await response.json()
+            end_time = time.time()
+            for result in results:
+                result['response_time'] = (end_time - start_time) / len(texts)
+            return results
+        else:
+            print(f"Error for batch")
+            print(f"Status code: {response.status}")
+            print(f"Response: {await response.text()}")
+            return None
+async def main():
+    # Inflate test_texts
+    inflated_texts = test_texts * 100  # Multiply the test set by 10
+    async with aiohttp.ClientSession() as session:
+        batch_size = 1000
+        batches = [inflated_texts[i:i + batch_size] for i in range(0, len(inflated_texts), batch_size)]
+        tasks = [process_batch(session, batch) for batch in batches]
+        all_results = await tqdm.gather(*tasks)
+    results = [item for sublist in all_results for item in sublist if sublist]
+    correct_predictions = 0
+    total_predictions = len(results)
+    total_response_time = 0
+    for result in results:
+        if result:
+            print(f"Text: {result['text']}")
+            print(f"Is Contact Info: {result['is_contact_info']}")
+            print(f"Method: {result['method']}")
+            print(f"Response Time: {result['response_time']:.4f} seconds")
+            print("---")
+            # Assuming all texts in test_texts are actually contact information
+            if result['is_contact_info']:
+                correct_predictions += 1
+            total_response_time += result['response_time']
+    accuracy = correct_predictions / total_predictions
+    average_response_time = total_response_time / total_predictions
+    print(f"Accuracy: {accuracy:.2f}")
+    print(f"Average Response Time: {average_response_time:.4f} seconds")
+if __name__ == "__main__":
+    while True:
+        asyncio.run(main())

vocab.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28edf2ae44d144c4566f0e5f95b856391166ac138ee578bac7fd9db151e1790a
+size 5184491