Spaces:

ShahzainHaider
/

leadfetcher

Paused

App Files Files Community

leadfetcher / functionality /funct.py

ShahzainHaider

Upload folder using huggingface_hub

c6c08f0 verified 7 months ago

raw

history blame contribute delete

3.72 kB

	import traceback
	import pandas as pd
	import json
	import uuid
	import time
	from apollo_apis import get_mixed_people, get_person_contact


	def convert_json_to_files(api_key,payload):
	try:
	# Parse the input JSON
	data = json.loads(payload)

	res = get_mixed_people(data,api_key)
	print("RES >>>>>>>>>>>>>>> ", res)
	# exit()
	starting_page_index = res["pagination"]["page"]
	ending_page_index = res["pagination"]["total_pages"]
	# print("Total Pages > ", ending_page_index)
	if ending_page_index > 5:
	ending_page_index = 5 # On free account only 5 pages can be scrapped

	name = []
	first_name = []
	email = []
	company = []
	website_link = []
	company_size = []
	job_title = []
	city = []
	country = []
	linkedin_profile = []
	phone_number = []
	# seo_description = []
	# industry = []
	# seo_keywords = []
	# technology = []

	df_data = {
	"Name": name,
	"First name": first_name,
	"email": email,
	"Company": company,
	"Website Link": website_link,
	"Job Title": job_title,
	"City": city,
	"Country": country,
	"Linkedin Profile": linkedin_profile,
	"Phone Number": phone_number,
	# "Company Size": company_size,
	# "Industry": industry,
	# "SEO description": seo_description,
	# "SEO keywords": seo_keywords,
	# "Technology": technology,
	}

	for x in range(1, ending_page_index + 1):

	data["page"] = x

	# print("data > ", data)

	res = get_mixed_people(data,api_key)

	if res is None:
	continue

	people = res["people"]
	print("Total People : ", len(people))

	for person in people:

	print("Name > ", person.get("name"))
	name.append(person.get("name"))
	first_name.append(person.get("first_name"))
	company.append(person["organization"]["name"])
	website_link.append(person["organization"].get("website_url"))
	job_title.append(person.get("title"))
	city.append(person.get("city"))
	country.append(person.get("country"))
	linkedin_profile.append(person.get("linkedin_url"))

	chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)]

	for chunk in chunks:
	print("Chunk size >>>>>>>> ", len(chunk))
	chunk = [{"linkedin_url": url} for url in chunk]
	person_email, person_number = get_person_contact(chunk, api_key)
	email.extend(person_email)
	phone_number.extend(person_number)

	# bulk_linkedin_urls = []

	# company_size.append()
	# industry.append()
	# seo_description
	# seo_keywords
	# technology

	# print("df_data > ", len(df_data))

	with open('saving_df.json', 'w') as json_file:
	json.dump(df_data, json_file, indent=4) # `indent=4` makes the JSON file readable

	df = pd.DataFrame(data=df_data)

	# Save as XLSX
	unique_id = uuid.uuid4()

	xlsx_file = f"output_files/leads_{unique_id}.xlsx"
	df.to_excel(xlsx_file, index=False)

	print("=" * 70)

	return df, xlsx_file

	except Exception as e:
	print(res)
	print(f"Exception \| convert_json_to_files \| {str(e)}")
	traceback.print_exc()
	df = pd.DataFrame(data={})
	return df, None