ShahzainHaider's picture
Upload folder using huggingface_hub
c6c08f0 verified
import traceback
import pandas as pd
import json
import uuid
import time
from apollo_apis import get_mixed_people, get_person_contact
def convert_json_to_files(api_key,payload):
try:
# Parse the input JSON
data = json.loads(payload)
res = get_mixed_people(data,api_key)
print("RES >>>>>>>>>>>>>>> ", res)
# exit()
starting_page_index = res["pagination"]["page"]
ending_page_index = res["pagination"]["total_pages"]
# print("Total Pages > ", ending_page_index)
if ending_page_index > 5:
ending_page_index = 5 # On free account only 5 pages can be scrapped
name = []
first_name = []
email = []
company = []
website_link = []
company_size = []
job_title = []
city = []
country = []
linkedin_profile = []
phone_number = []
# seo_description = []
# industry = []
# seo_keywords = []
# technology = []
df_data = {
"Name": name,
"First name": first_name,
"email": email,
"Company": company,
"Website Link": website_link,
"Job Title": job_title,
"City": city,
"Country": country,
"Linkedin Profile": linkedin_profile,
"Phone Number": phone_number,
# "Company Size": company_size,
# "Industry": industry,
# "SEO description": seo_description,
# "SEO keywords": seo_keywords,
# "Technology": technology,
}
for x in range(1, ending_page_index + 1):
data["page"] = x
# print("data > ", data)
res = get_mixed_people(data,api_key)
if res is None:
continue
people = res["people"]
print("Total People : ", len(people))
for person in people:
print("Name > ", person.get("name"))
name.append(person.get("name"))
first_name.append(person.get("first_name"))
company.append(person["organization"]["name"])
website_link.append(person["organization"].get("website_url"))
job_title.append(person.get("title"))
city.append(person.get("city"))
country.append(person.get("country"))
linkedin_profile.append(person.get("linkedin_url"))
chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)]
for chunk in chunks:
print("Chunk size >>>>>>>> ", len(chunk))
chunk = [{"linkedin_url": url} for url in chunk]
person_email, person_number = get_person_contact(chunk, api_key)
email.extend(person_email)
phone_number.extend(person_number)
# bulk_linkedin_urls = []
# company_size.append()
# industry.append()
# seo_description
# seo_keywords
# technology
# print("df_data > ", len(df_data))
with open('saving_df.json', 'w') as json_file:
json.dump(df_data, json_file, indent=4) # `indent=4` makes the JSON file readable
df = pd.DataFrame(data=df_data)
# Save as XLSX
unique_id = uuid.uuid4()
xlsx_file = f"output_files/leads_{unique_id}.xlsx"
df.to_excel(xlsx_file, index=False)
print("=" * 70)
return df, xlsx_file
except Exception as e:
print(res)
print(f"Exception | convert_json_to_files | {str(e)}")
traceback.print_exc()
df = pd.DataFrame(data={})
return df, None