bluenevus's picture
Update app.py
a92a91d verified
raw
history blame
6.34 kB
import gradio as gr
import requests
import json
import re
from github import Github, GithubException, Auth
import google.generativeai as genai
def extract_licenses(file_content, github_url, github_token, gemini_api_key):
# Parse the dependency file
dependencies = parse_dependency_file(file_content)
# Fetch license information
licenses = fetch_license_info(dependencies)
# Determine probable packages
probable_packages = determine_probable_packages(file_content, dependencies)
# Enrich information using Gemini
enriched_info = enrich_with_gemini(licenses, probable_packages, gemini_api_key)
return enriched_info
def parse_dependency_file(file_content):
dependencies = []
lines = file_content.split('\n')
for line in lines:
if '=' in line or '@' in line or ':' in line:
parts = re.split(r'[=@:]', line)
package = parts[0].strip()
version = parts[1].strip() if len(parts) > 1 else "latest"
dependencies.append((package, version))
return dependencies
def fetch_license_info(dependencies):
licenses = []
for package, version in dependencies:
try:
response = requests.get(f"https://pypi.org/pypi/{package}/{version}/json")
data = response.json()
license = data['info'].get('license', 'Unknown')
description = data['info'].get('summary', 'No description available')
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: {license}\nDescription: {description}\n")
except:
licenses.append(f"Package: {package}\nVersion: {version}\nLicense: Unknown\nDescription: Unable to fetch information\n")
return "\n".join(licenses)
def determine_probable_packages(file_content, dependencies):
probable_packages = []
if "package.json" in file_content.lower():
probable_packages.append("npm (Node Package Manager)")
elif "gemfile" in file_content.lower():
probable_packages.append("Bundler (Ruby)")
elif "requirements.txt" in file_content.lower():
probable_packages.append("pip (Python Package Installer)")
# Add more probable packages based on common dependencies
common_packages = {
"react": "React (JavaScript library)",
"django": "Django (Python web framework)",
"rails": "Ruby on Rails (Web application framework)",
}
for package, _ in dependencies:
if package.lower() in common_packages:
probable_packages.append(common_packages[package.lower()])
return "\n".join(probable_packages)
def enrich_with_gemini(licenses, probable_packages, api_key):
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
prompt = f"""
Analyze the following open-source license information and probable packages:
License Information:
{licenses}
Probable Packages:
{probable_packages}
Please provide a summary of the project based on these dependencies, including:
1. The likely type of project (e.g., web application, data science, etc.)
2. Any potential license conflicts or considerations
3. Suggestions for best practices in open-source license management for this project
"""
response = model.generate_content(prompt)
return response.text
def fetch_github_info(github_url, github_token):
try:
# Create an authentication object
auth = Auth.Token(github_token)
# Create a Github instance with authentication
g = Github(auth=auth)
# Extract the repository name and file path from the URL
_, _, _, owner, repo, _, *path_parts = github_url.split('/')
repo_name = f"{owner}/{repo}"
file_path = '/'.join(path_parts)
print(f"Attempting to access file: {file_path} in repository: {repo_name}")
# Get the repository
repo = g.get_repo(repo_name)
# Get the file contents
contents = repo.get_contents(file_path)
file_content = contents.decoded_content.decode('utf-8')
print(f"Successfully retrieved {file_path}")
return file_content
except GithubException as e:
if e.status == 404:
return f"Error: File or repository not found. Please check the URL and ensure you have the correct access permissions. Details: {str(e)}"
else:
return f"Error accessing GitHub: {str(e)}"
except Exception as e:
return f"Unexpected error: {str(e)}"
def process_input(file, github_url, github_token, gemini_api_key):
if file is not None and github_url:
return "Error: Please either upload a file OR provide a GitHub URL, not both."
if file is not None:
file_content = file.decode('utf-8')
elif github_url and github_token:
if not github_url.startswith("https://github.com/"):
return "Error: Invalid GitHub URL. Please use the format: https://github.com/username/repository/blob/branch/path/to/file"
if not github_token.strip():
return "Error: GitHub Personal Access Token is empty. Please provide a valid token."
file_content = fetch_github_info(github_url, github_token)
if file_content.startswith("Error:"):
return file_content
else:
return "Error: Please either upload a file OR provide both GitHub URL and access token."
try:
return extract_licenses(file_content, github_url, github_token, gemini_api_key)
except Exception as e:
return f"Error processing the file: {str(e)}"
iface = gr.Interface(
fn=process_input,
inputs=[
gr.File(label="Upload dependency file (e.g., requirements.txt, package.json, Gemfile)"),
gr.Textbox(label="GitHub File URL (optional)"),
gr.Textbox(label="GitHub Personal Access Token (required if using GitHub URL)", type="password"),
gr.Textbox(label="Gemini API Key", type="password"),
],
outputs=gr.Textbox(label="License Information and Analysis"),
title="Open Source License Extractor",
description="Upload a dependency file OR provide a GitHub file URL to extract and analyze open-source license information.",
)
if __name__ == "__main__":
iface.launch()