Hisab Cloud commited on
Commit
08b7f89
1 Parent(s): b35b011

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +44 -0
  2. github_utils.py +47 -0
  3. markdown_utils.py +49 -0
  4. requirements.txt +6 -0
  5. translation_utils.py +111 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from github_utils import clone_repo, push_translated_files, clean_local_repo
3
+ from markdown_utils import parse_markdown_files, extract_translatable_text, save_translated_files
4
+ from translation_utils import translate_content
5
+
6
+ def translate(repo_url, target_language):
7
+ try:
8
+ # 1. استيراد الملفات من GitHub
9
+ files = clone_repo(repo_url)
10
+
11
+ # 2. تقسيم النصوص إلى توكنات
12
+ parsed_files = parse_markdown_files(files)
13
+
14
+ # 3. الترجمة باستخدام نموذج اللغة
15
+ translated_files = []
16
+ for file in parsed_files:
17
+ translatable_texts = extract_translatable_text(file['content'])
18
+ translated_content = translate_content(translatable_texts, target_language)
19
+ translated_files.append({'filename': file['filename'], 'content': translated_content})
20
+
21
+ # 4. تجميع النصوص المترجمة
22
+ save_translated_files(translated_files)
23
+
24
+ # 5. رفع الملفات المترجمة إلى GitHub
25
+ push_translated_files('cloned_repo')
26
+
27
+ # 6. مسح الملفات المحلية
28
+ clean_local_repo()
29
+
30
+ return 'Translation completed and files pushed to GitHub'
31
+ except Exception as e:
32
+ return str(e)
33
+
34
+ # إنشاء واجهة Gradio
35
+ iface = gr.Interface(
36
+ fn=translate,
37
+ inputs=[gr.Textbox(label="GitHub Repo URL"), gr.Textbox(label="Target Language")],
38
+ outputs="text",
39
+ title="Markdown Translator",
40
+ description="Translate Markdown files from a GitHub repository to a target language."
41
+ )
42
+
43
+ if __name__ == "__main__":
44
+ iface.launch()
github_utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from git import Repo
4
+
5
+ def clone_repo(repo_url, clone_dir='cloned_repo'):
6
+ """
7
+ Clones a GitHub repository to the local machine.
8
+
9
+ :param repo_url: URL of the GitHub repository
10
+ :param clone_dir: Directory where the repository will be cloned
11
+ :return: List of markdown file paths
12
+ """
13
+ if os.path.exists(clone_dir):
14
+ shutil.rmtree(clone_dir)
15
+ Repo.clone_from(repo_url, clone_dir)
16
+ markdown_files = []
17
+ for root, _, files in os.walk(clone_dir):
18
+ for file in files:
19
+ if file.endswith('.md'):
20
+ markdown_files.append(os.path.join(root, file))
21
+ return markdown_files
22
+
23
+ def push_translated_files(repo_url, translated_files, clone_dir='cloned_repo'):
24
+ """
25
+ Pushes translated files back to the GitHub repository.
26
+
27
+ :param repo_url: URL of the GitHub repository
28
+ :param translated_files: List of translated file data
29
+ :param clone_dir: Directory where the repository is cloned
30
+ """
31
+ repo = Repo(clone_dir)
32
+ origin = repo.remote(name='origin')
33
+ for file in translated_files:
34
+ with open(os.path.join(clone_dir, file['filename']), 'w', encoding='utf-8') as f:
35
+ f.write(file['content'])
36
+ repo.index.add([file['filename'] for file in translated_files])
37
+ repo.index.commit('Add translated files')
38
+ origin.push()
39
+
40
+ def clean_local_repo(clone_dir='cloned_repo'):
41
+ """
42
+ Cleans up the local cloned repository.
43
+
44
+ :param clone_dir: Directory where the repository is cloned
45
+ """
46
+ if os.path.exists(clone_dir):
47
+ shutil.rmtree(clone_dir)
markdown_utils.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+
4
+ def parse_markdown_files(file_paths):
5
+ """
6
+ Parses markdown files to extract content for translation.
7
+
8
+ :param file_paths: List of markdown file paths
9
+ :return: List of dictionaries containing filename and content
10
+ """
11
+ parsed_files = []
12
+ for path in file_paths:
13
+ with open(path, 'r', encoding='utf-8') as f:
14
+ content = f.read()
15
+ parsed_files.append({'filename': path, 'content': content})
16
+ return parsed_files
17
+
18
+ def extract_translatable_text(content):
19
+ """
20
+ Extracts translatable text from markdown content.
21
+
22
+ :param content: Markdown content
23
+ :return: List of translatable text segments
24
+ """
25
+ code_block_pattern = re.compile(r'```.*?```', re.DOTALL)
26
+ html_block_pattern = re.compile(r'<.*?>', re.DOTALL)
27
+ url_pattern = re.compile(r'\[.*?\]\(.*?\)')
28
+
29
+ # Remove code blocks, HTML blocks, and URLs
30
+ content = re.sub(code_block_pattern, '', content)
31
+ content = re.sub(html_block_pattern, '', content)
32
+ content = re.sub(url_pattern, '', content)
33
+
34
+ # Extract paragraphs and headers
35
+ paragraphs = re.split(r'\n\s*\n', content)
36
+ return [para.strip() for para in paragraphs if para.strip()]
37
+
38
+ def save_translated_files(translated_files):
39
+ """
40
+ Saves translated files to the local machine.
41
+
42
+ :param translated_files: List of translated file data
43
+ """
44
+ for file in translated_files:
45
+ directory = os.path.dirname(file['filename'])
46
+ if not os.path.exists(directory):
47
+ os.makedirs(directory)
48
+ with open(file['filename'], 'w', encoding='utf-8') as f:
49
+ f.write(file['content'])
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==3.0.3
2
+ gitpython==3.1.36
3
+ requests==2.31.0
4
+ flask-cors==4.0.0
5
+ gradio==3.11.0
6
+ groq==0.9.0
translation_utils.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ # from flask_cors import CORS
3
+
4
+ from groq import Groq
5
+ # CORS()
6
+ # GROQ_API_URL = "https://api.groq.com/translate" # استبدل هذا بعنوان API الخاص بـ Groq
7
+ GROQ_API_KEY = "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" # ضع هنا مفتاح API الخاص بك
8
+
9
+ client = Groq(api_key=GROQ_API_KEY)
10
+
11
+ def translate_texts_groq(texts, target_language="ar"):
12
+ """
13
+ Translates a list of texts to the target language using Groq.
14
+
15
+ :param texts: List of texts to translate
16
+ :param target_language: Target language code
17
+ :return: List of translated texts
18
+ """
19
+ # headers = {
20
+ # "Authorization": f"Bearer {GROQ_API_KEY}",
21
+ # "Content-Type": "application/json"
22
+ # }
23
+
24
+ translated_texts = []
25
+ # for text in texts:
26
+ # payload = {
27
+ # "source_language": "en", # تأكد من ضبط لغة المصدر
28
+ # "target_language": target_language,
29
+ # "text": text
30
+ # }
31
+
32
+ # response = requests.post(GROQ_API_URL, json=payload, headers=headers)
33
+ # if response.status_code == 200:
34
+ # translated_text = response.json().get("translated_text")
35
+ # translated_texts.append(translated_text)
36
+ # else:
37
+ # # في حالة حدوث خطأ، يمكنك التعامل معه هنا
38
+ # translated_texts.append(text) # احتفظ بالنص الأصلي في حال فشل الترجمة
39
+
40
+ chat_completion = client.chat.completions.create(
41
+ messages=[
42
+ {
43
+ "role": "user",
44
+ "content": f"Translate the text fllowing into Arabic language:{texts}",
45
+ }
46
+ ],
47
+ model="llama3-8b-8192",
48
+ )
49
+
50
+ translated_texts.append(chat_completion.choices[0].message.content)
51
+
52
+ return translated_texts
53
+
54
+ def translate_content(content, target_language):
55
+ """
56
+ Translates the content of a markdown file, preserving non-translatable parts.
57
+
58
+ :param content: Markdown content
59
+ :param target_language: Target language code
60
+ :return: Translated content
61
+ """
62
+ translatable_texts = extract_translatable_text(content)
63
+ translated_texts = translate_texts_groq(translatable_texts, target_language)
64
+
65
+ # Reconstruct the content with translated texts
66
+ for original, translated in zip(translatable_texts, translated_texts):
67
+ content = content.replace(original, translated)
68
+ return content
69
+
70
+
71
+
72
+
73
+
74
+ # from transformers import MarianMTModel, MarianTokenizer
75
+
76
+ # # Initialize the model and tokenizer
77
+ # model_name = 'Helsinki-NLP/opus-mt-en-<target_language>' # Replace <target_language> with the target language code
78
+ # model = MarianMTModel.from_pretrained(model_name)
79
+ # tokenizer = MarianTokenizer.from_pretrained(model_name)
80
+
81
+ # def translate_texts(texts, target_language):
82
+ # """
83
+ # Translates a list of texts to the target language.
84
+
85
+ # :param texts: List of texts to translate
86
+ # :param target_language: Target language code
87
+ # :return: List of translated texts
88
+ # """
89
+ # translated_texts = []
90
+ # for text in texts:
91
+ # inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
92
+ # translated = model.generate(**inputs)
93
+ # translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
94
+ # translated_texts.append(translated_text)
95
+ # return translated_texts
96
+
97
+ # def translate_content(content, target_language):
98
+ # """
99
+ # Translates the content of a markdown file, preserving non-translatable parts.
100
+
101
+ # :param content: Markdown content
102
+ # :param target_language: Target language code
103
+ # :return: Translated content
104
+ # """
105
+ # translatable_texts = extract_translatable_text(content)
106
+ # translated_texts = translate_texts(translatable_texts, target_language)
107
+
108
+ # # Reconstruct the content with translated texts
109
+ # for original, translated in zip(translatable_texts, translated_texts):
110
+ # content = content.replace(original, translated)
111
+ # return content