-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJOBFINDER.py
More file actions
97 lines (81 loc) · 4.02 KB
/
JOBFINDER.py
File metadata and controls
97 lines (81 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import json
import PyPDF2
import textract
import re
import nltk
import requests
# Download NLTK punkt tokenizer
nltk.download('punkt')
results = []
# Retrieve JSON data from the URL
url = "https://a856-136-233-9-98.ngrok-free.app/?location=india"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
else:
print("Error:", response.status_code)
exit() # Exit the script if there's an error retrieving JSON data
# Open and extract text from the PDF
filename = r"D:\Users\Administrator\OneDrive - vit.ac.in\Desktop\Resume.pdf"
try:
pdfFileObj = open(filename, 'rb')
pdfReader = PyPDF2.PdfReader(pdfFileObj)
num_pages = len(pdfReader.pages)
text = ""
for count in range(num_pages):
pageObj = pdfReader.pages[count]
text += pageObj.extract_text()
# If text extraction failed, use alternative method
if not text:
text = textract.process('http://bit.ly/epo_keyword_extraction_document', method='tesseract', language='eng')
except Exception as e:
print("Error processing PDF:", e)
text = "" # Reset text to empty string
# Specific keywords to identify for skills for different job roles
skills_keywords = {
'Machine Learning': ['Machine Learning', 'Deep Learning', 'Python', 'TensorFlow', 'PyTorch', 'Natural Language Processing', 'Computer Vision'],
'Software Developer': ['Java', 'JavaScript', 'HTML', 'CSS', 'ReactJS', 'Node.js', 'SQL', 'RESTful APIs'],
'Data Analyst': ['SQL', 'Data Analysis', 'Statistics', 'Data Visualization', 'Python', 'Pandas', 'NumPy', 'Matplotlib', 'Seaborn'],
'Product Manager': ['Product Management', 'Agile', 'Scrum', 'User Experience Design', 'Market Research', 'Product Strategy'],
'DevOps Engineer': ['Linux', 'Docker', 'Kubernetes', 'AWS', 'Azure', 'Jenkins', 'CI/CD'],
'Cybersecurity Analyst': ['Cybersecurity', 'Network Security', 'Penetration Testing', 'Security Operations', 'Firewalls', 'Encryption'],
'Full Stack Developer': ['JavaScript', 'HTML', 'CSS', 'ReactJS', 'Node.js', 'Python', 'Django', 'RESTful APIs', 'MongoDB', 'SQL']
}
# Find specific keywords in the text for each job role
skills_found = {role: [word for word in keywords if re.search(r'\b' + re.escape(word) + r'\b', text)] for role, keywords in skills_keywords.items()}
# Calculate match rate for each role
match_rates = {role: len(found_words) / len(skills_keywords[role]) * 100 if len(skills_keywords[role]) > 0 else 0 for role, found_words in skills_found.items()}
# Initialize variables to store the details of the company with the highest match rate
best_company_name = ""
best_title = ""
best_match_role = ""
best_match_rate = 0
# Iterate through each company's data
for company_data in data:
company_name = company_data.get("name", "") # Extract the company name from the dictionary
title = company_data.get("title", "") # Extract the job title
# Iterate through each role and its keywords
for role, keywords in skills_found.items():
# Check if the role is found in the title
if role.lower() in title.lower():
# If there is a match, calculate the match rate for the role
match_rate = match_rates.get(role, 0)
print(match_rate)
# If the match rate is higher than the current highest match rate, update the best match details
if match_rate > best_match_rate:
best_company_name = company_name
best_title = title
best_match_role = role
best_match_rate = match_rate
# Append the details of the company with the highest match rate to the results list
results.append({
"company": best_company_name,
"title": best_title,
"best_match_role": best_match_role,
"match_rate": best_match_rate
})
# Save the dictionaries to a JSON file
output_data = {'results': results}
with open('skills.json', 'w') as json_file:
json.dump(output_data, json_file, indent=2)
print('JSON data saved to skills.json')