Spaces:
Paused
Paused
| import traceback | |
| import pandas as pd | |
| import json | |
| import uuid | |
| import time | |
| from apollo_apis import get_mixed_people, get_person_contact | |
| def convert_json_to_files(api_key,payload): | |
| try: | |
| # Parse the input JSON | |
| data = json.loads(payload) | |
| res = get_mixed_people(data,api_key) | |
| print("RES >>>>>>>>>>>>>>> ", res) | |
| # exit() | |
| starting_page_index = res["pagination"]["page"] | |
| ending_page_index = res["pagination"]["total_pages"] | |
| # print("Total Pages > ", ending_page_index) | |
| if ending_page_index > 5: | |
| ending_page_index = 5 # On free account only 5 pages can be scrapped | |
| name = [] | |
| first_name = [] | |
| email = [] | |
| company = [] | |
| website_link = [] | |
| company_size = [] | |
| job_title = [] | |
| city = [] | |
| country = [] | |
| linkedin_profile = [] | |
| phone_number = [] | |
| # seo_description = [] | |
| # industry = [] | |
| # seo_keywords = [] | |
| # technology = [] | |
| df_data = { | |
| "Name": name, | |
| "First name": first_name, | |
| "email": email, | |
| "Company": company, | |
| "Website Link": website_link, | |
| "Job Title": job_title, | |
| "City": city, | |
| "Country": country, | |
| "Linkedin Profile": linkedin_profile, | |
| "Phone Number": phone_number, | |
| # "Company Size": company_size, | |
| # "Industry": industry, | |
| # "SEO description": seo_description, | |
| # "SEO keywords": seo_keywords, | |
| # "Technology": technology, | |
| } | |
| for x in range(1, ending_page_index + 1): | |
| data["page"] = x | |
| # print("data > ", data) | |
| res = get_mixed_people(data,api_key) | |
| if res is None: | |
| continue | |
| people = res["people"] | |
| print("Total People : ", len(people)) | |
| for person in people: | |
| print("Name > ", person.get("name")) | |
| name.append(person.get("name")) | |
| first_name.append(person.get("first_name")) | |
| company.append(person["organization"]["name"]) | |
| website_link.append(person["organization"].get("website_url")) | |
| job_title.append(person.get("title")) | |
| city.append(person.get("city")) | |
| country.append(person.get("country")) | |
| linkedin_profile.append(person.get("linkedin_url")) | |
| chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)] | |
| for chunk in chunks: | |
| print("Chunk size >>>>>>>> ", len(chunk)) | |
| chunk = [{"linkedin_url": url} for url in chunk] | |
| person_email, person_number = get_person_contact(chunk, api_key) | |
| email.extend(person_email) | |
| phone_number.extend(person_number) | |
| # bulk_linkedin_urls = [] | |
| # company_size.append() | |
| # industry.append() | |
| # seo_description | |
| # seo_keywords | |
| # technology | |
| # print("df_data > ", len(df_data)) | |
| with open('saving_df.json', 'w') as json_file: | |
| json.dump(df_data, json_file, indent=4) # `indent=4` makes the JSON file readable | |
| df = pd.DataFrame(data=df_data) | |
| # Save as XLSX | |
| unique_id = uuid.uuid4() | |
| xlsx_file = f"output_files/leads_{unique_id}.xlsx" | |
| df.to_excel(xlsx_file, index=False) | |
| print("=" * 70) | |
| return df, xlsx_file | |
| except Exception as e: | |
| print(res) | |
| print(f"Exception | convert_json_to_files | {str(e)}") | |
| traceback.print_exc() | |
| df = pd.DataFrame(data={}) | |
| return df, None | |