Spaces:

KIMOSSINO
/

hashtagtik

Sleeping

App Files Files Community

KIMOSSINO commited on Dec 10, 2024

Commit

f33309d

verified ·

1 Parent(s): 4dbac94

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -102

app.py CHANGED Viewed

@@ -1,122 +1,116 @@
 import gradio as gr
 from bs4 import BeautifulSoup
-import requests
-import re
-from urllib.parse import urlparse
-def extract_youtube_id(url):
-    """استخراج معرف فيديو يوتيوب من الرابط"""
-    match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
-    return match.group(1) if match else None
-def extract_tiktok_id(url):
-    """استخراج معرف فيديو تيكتوك من الرابط"""
-    match = re.search(r'video/(\d+)', url)
-    return match.group(1) if match else None
-def get_hashtags_from_text(text):
-    """استخراج الهاشتاغات من النص"""
-    return re.findall(r'#\w+', text)
-def extract_from_url(url):
-    """استخراج البيانات من الرابط"""
-    domain = urlparse(url).netloc
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-    }
     try:
-        # استخراج البيانات من YouTube
-        if 'youtube.com' in domain or 'youtu.be' in domain:
-            video_id = extract_youtube_id(url)
-            if not video_id:
-                return "رابط يوتيوب غير صالح", "", ""
-            response = requests.get(url, headers=headers)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # استخراج العنوان
-            title_element = soup.find('h1', {'id': 'title'})
-            title = title_element.text.strip() if title_element else "العنوان غير متوفر"
-            # استخراج الوصف
-            description_element = soup.find('div', {'id': 'description'})
-            description = description_element.text.strip() if description_element else "الوصف غير متوفر"
-            # استخراج الهاشتاغات
-            hashtags = get_hashtags_from_text(description)
-            return title, description, "\n".join(hashtags)
-        # استخراج البيانات من TikTok
-        elif 'tiktok.com' in domain:
-            video_id = extract_tiktok_id(url)
-            if not video_id:
-                return "رابط تيكتوك غير صالح", "", ""
-            response = requests.get(url, headers=headers)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # استخراج العنوان
-            title_element = soup.find('h1')
-            title = title_element.text.strip() if title_element else "العنوان غير متوفر"
-            # استخراج الوصف
-            description = title  # الوصف موجود داخل نفس العنصر <h1>
-            # استخراج الهاشتاغات
-            hashtags_elements = soup.find_all('a', {'class': re.compile(r'.*hashtag.*')})
-            hashtags = [tag.text.strip() for tag in hashtags_elements]
-            return title, description, "\n".join(hashtags)
     except Exception as e:
-        return f"حدث خطأ: {str(e)}", "", ""
-    return "لم يتم العثور على بيانات", "", ""
 def gradio_interface():
     with gr.Blocks() as demo:
-        gr.Markdown("## 📱 محلل روابط وسائل التواصل الاجتماعي")
-        with gr.Row():
-            url_input = gr.Textbox(
-                label="🔗 أدخل رابط يوتيوب/تيكتوك",
-                placeholder="https://.../video/..."
-            )
         with gr.Row():
-            analyze_btn = gr.Button("تحليل الرابط", variant="primary")
         with gr.Row():
-            title_output = gr.Textbox(
-                label="📝 العنوان",
-                lines=2,
-                interactive=False
-            )
         with gr.Row():
-            description_output = gr.Textbox(
-                label="📄 الوصف",
-                lines=5,
-                interactive=False
             )
-        with gr.Row():
             hashtags_output = gr.Textbox(
-                label="🏷️ الهاشتاغات",
-                lines=5,
-                interactive=False
             )
         analyze_btn.click(
-            fn=extract_from_url,
-            inputs=[url_input],
-            outputs=[title_output, description_output, hashtags_output]
         )
     return demo
 if __name__ == "__main__":
     demo = gradio_interface()
     demo.launch()

 import gradio as gr
+from collections import Counter
 from bs4 import BeautifulSoup
+def extract_titles_and_hashtags_and_views(file):
     try:
+        # قراءة محتوى الملف
+        if hasattr(file, 'read'):
+            content = file.read()
+        else:
+            with open(file.name, 'r', encoding='utf-8') as f:
+                content = f.read()
+    except Exception as e:
+        return f"خطأ أثناء قراءة الملف: {str(e)}", "", "", ""
+    # تحليل HTML باستخدام BeautifulSoup
+    try:
+        soup = BeautifulSoup(content, 'html.parser')
     except Exception as e:
+        return f"خطأ في تحليل محتوى HTML: {str(e)}", "", "", ""
+    # استخراج البيانات
+    data = []
+    hashtags_counter = Counter()
+    views_text = []
+    # البحث عن الحاويات
+    desc_containers = soup.find_all('div', class_="css-vi46v1-DivDesContainer")
+    if not desc_containers:
+        return "لم يتم العثور على أي بيانات مطابقة.", "", "", ""
+    # معالجة كل حاوية
+    for container in desc_containers:
+        # البحث عن العنوان
+        title = (
+            container.find('h2', class_='title')
+            or container.find('h1', class_='title')
+            or container.find('div', class_='title')
+            or container.find(class_='title')
+        )
+        title = title.get_text(strip=True) if title else container.get('aria-label', 'بدون عنوان').strip()
+        # استخراج الهاشتاغات
+        hashtags = [tag.get_text(strip=True) for tag in container.find_all('a') if tag.get_text(strip=True).startswith('#')]
+        if hashtags:
+            hashtags_counter.update(hashtags)
+        # استخراج نسبة المشاهدة
+        view = container.find('strong', class_="css-ws4x78-StrongVideoCount etrd4pu10")
+        view = view.get_text(strip=True) if view else "غير متوفر"
+        # تخزين البيانات
+        data.append({
+            "Title": title,
+            "Hashtags": ", ".join(hashtags),
+            "Views": view
+        })
+        views_text.append(f"{title}: {view}")
+    # إعداد النصوص النهائية
+    titles_text = "\n".join(f"{i+1}. {row['Title']}" for i, row in enumerate(data) if row['Title'] != 'بدون عنوان')
+    hashtags_text = "\n".join(f"{hashtag}: {count}" for hashtag, count in sorted(hashtags_counter.items(), key=lambda x: (-x[1], x[0])))
+    views_summary_text = "\n".join(views_text)
+    return (
+        titles_text or "لا توجد عناوين مستخرجة.",
+        hashtags_text or "لا توجد هاشتاغات مستخرجة.",
+        views_summary_text or "لا توجد بيانات مشاهدة."
+    )
+# إنشاء واجهة Gradio
 def gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("## 📝 محلل النصوص المتقدم")
         with gr.Row():
+            file_input = gr.File(label="📂 رفع ملف TXT", file_types=[".txt"])
         with gr.Row():
+            analyze_btn = gr.Button("تحليل البيانات", variant="primary")
         with gr.Row():
+            titles_output = gr.Textbox(
+                label="📜 العناوين المستخرجة",
+                lines=10,
+                interactive=False,
+                placeholder="ستظهر العناوين هنا"
             )
             hashtags_output = gr.Textbox(
+                label="🏷️ الهاشتاغات المستخرجة (مع التكرار)",
+                lines=10,
+                interactive=False,
+                placeholder="ستظهر الهاشتاغات هنا"
+            )
+            views_output = gr.Textbox(
+                label="👀 نسبة المشاهدة",
+                lines=10,
+                interactive=False,
+                placeholder="ستظهر نسب المشاهدة هنا"
             )
         analyze_btn.click(
+            fn=extract_titles_and_hashtags_and_views,
+            inputs=[file_input],
+            outputs=[titles_output, hashtags_output, views_output],
         )
     return demo
+# تشغيل التطبيق
 if __name__ == "__main__":
     demo = gradio_interface()
     demo.launch()