Skip to content

Commit

Permalink
Merge pull request #1144 from aparupganguly/feature/o3-mini-job-resource
Browse files Browse the repository at this point in the history
Feature/o3 mini job resource extractor
  • Loading branch information
ericciarla authored Feb 7, 2025
2 parents ac5c88b + 46f05a7 commit 2b7b740
Showing 1 changed file with 265 additions and 0 deletions.
265 changes: 265 additions & 0 deletions examples/job-resource-analyzer/job-resources-analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
import os
import json
import time
import requests
from dotenv import load_dotenv
from openai import OpenAI
from serpapi.google_search import GoogleSearch

class Colors:
CYAN = '\033[96m'
YELLOW = '\033[93m'
GREEN = '\033[92m'
RED = '\033[91m'
RESET = '\033[0m'

load_dotenv()

# Initialize clients
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
serp_api_key = os.getenv("SERP_API_KEY")

def extract_job_requirements(url, api_key):
"""Extract essential job requirements using Firecrawl."""
print(f"{Colors.YELLOW}Extracting job requirements...{Colors.RESET}")

headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}'
}

prompt = """
Extract only:
- job_title: position title (string)
- required_skills: top 5 technical skills (array)
- experience_level: years required (string)
"""

payload = {
"urls": [url],
"prompt": prompt,
"enableWebSearch": False
}

try:
response = requests.post(
"https://api.firecrawl.dev/v1/extract",
headers=headers,
json=payload,
timeout=30
)

data = response.json()
if not data.get('success'):
return None

return poll_extraction_result(data.get('id'), api_key)

except Exception as e:
print(f"{Colors.RED}Error extracting job requirements: {str(e)}{Colors.RESET}")
return None

def poll_extraction_result(extraction_id, api_key, interval=5, max_attempts=12):
"""Poll for extraction results."""
url = f"https://api.firecrawl.dev/v1/extract/{extraction_id}"
headers = {'Authorization': f'Bearer {api_key}'}

for _ in range(max_attempts):
try:
response = requests.get(url, headers=headers, timeout=30)
data = response.json()
if data.get('success') and data.get('data'):
return data['data']
time.sleep(interval)
except Exception as e:
print(f"{Colors.YELLOW}Polling attempt failed, retrying...{Colors.RESET}")
continue
return None

def rank_and_summarize_resources(resources, skills):
"""Use OpenAI to rank and summarize learning resources."""
try:
# Prepare resources for ranking
all_resources = []
for category, items in resources.items():
for item in items:
all_resources.append({
"category": category,
"title": item["title"],
"url": item["url"]
})

# Create prompt for OpenAI
skills_str = ", ".join(skills)
prompt = f"""Given these learning resources for skills ({skills_str}),
rank them by relevance and quality, and provide a brief summary:
Resources:
{json.dumps(all_resources, indent=2)}
For each resource, provide:
1. Relevance score (1-10)
2. Brief summary (max 2 sentences)
3. Why it's useful for the target skills
Format as JSON with structure:
{{
"ranked_resources": [
{{
"category": "...",
"title": "...",
"url": "...",
"relevance_score": X,
"summary": "...",
"usefulness": "..."
}}
]
}}"""

response = client.chat.completions.create(
model="o3-mini",
messages=[
{"role": "system", "content": "You are a technical learning resource curator."},
{"role": "user", "content": prompt}
],
)

# Parse and return ranked resources
ranked_data = json.loads(response.choices[0].message.content)
return ranked_data["ranked_resources"]

except Exception as e:
print(f"{Colors.RED}Error in ranking resources: {str(e)}{Colors.RESET}")
return None

def get_prep_resources(skills):
"""Get and rank learning resources for top skills."""
try:
core_resources = {
"Tutorials": [],
"Practice": [],
"Documentation": []
}

# Search for top 2 skills to reduce API usage
top_skills = skills[:2]
search = GoogleSearch({
"q": f"learn {' '.join(top_skills)} tutorial practice exercises documentation",
"api_key": serp_api_key,
"num": 6
})
results = search.get_dict().get("organic_results", [])

for result in results[:6]:
url = result.get("link", "")
title = result.get("title", "")

if "tutorial" in title.lower() or "guide" in title.lower():
core_resources["Tutorials"].append({"title": title, "url": url})
elif "practice" in title.lower() or "exercise" in title.lower():
core_resources["Practice"].append({"title": title, "url": url})
elif "doc" in title.lower() or "reference" in title.lower():
core_resources["Documentation"].append({"title": title, "url": url})

# Rank and summarize resources
ranked_resources = rank_and_summarize_resources(core_resources, top_skills)
return ranked_resources

except Exception as e:
print(f"{Colors.RED}Error getting resources: {str(e)}{Colors.RESET}")
return None

def generate_weekly_plan(skills):
"""Generate a concise weekly preparation plan."""
weeks = []
total_skills = len(skills)

# Week 1: Fundamentals
weeks.append({
"focus": "Fundamentals",
"skills": skills[:2] if total_skills >= 2 else skills,
"tasks": ["Study core concepts", "Complete basic tutorials"]
})

# Week 2: Advanced Concepts
if total_skills > 2:
weeks.append({
"focus": "Advanced Topics",
"skills": skills[2:4],
"tasks": ["Deep dive into advanced features", "Practice exercises"]
})

# Week 3: Projects & Practice
weeks.append({
"focus": "Projects",
"skills": "All core skills",
"tasks": ["Build small projects", "Solve practice problems"]
})

# Week 4: Interview Prep
weeks.append({
"focus": "Interview Prep",
"skills": "All skills",
"tasks": ["Mock interviews", "Code reviews"]
})

return weeks

def format_output(job_info, ranked_resources, weeks):
"""Format output in a concise way with ranked resources."""
output = f"\n{Colors.GREEN}=== Job Preparation Guide ==={Colors.RESET}\n"

# Job Requirements
output += f"\n{Colors.CYAN}Position:{Colors.RESET} {job_info.get('job_title', 'N/A')}"
output += f"\n{Colors.CYAN}Experience:{Colors.RESET} {job_info.get('experience_level', 'N/A')}"
output += f"\n{Colors.CYAN}Key Skills:{Colors.RESET}"
for skill in job_info.get('required_skills', []):
output += f"\n- {skill}"

# Weekly Plan
output += f"\n\n{Colors.CYAN}4-Week Plan:{Colors.RESET}"
for i, week in enumerate(weeks, 1):
output += f"\n\n📅 Week {i}: {week['focus']}"
output += f"\n Skills: {', '.join(week['skills']) if isinstance(week['skills'], list) else week['skills']}"
output += f"\n Tasks: {' → '.join(week['tasks'])}"

# Ranked Learning Resources
if ranked_resources:
output += f"\n\n{Colors.CYAN}Top Recommended Resources:{Colors.RESET}"

# Sort resources by relevance score
sorted_resources = sorted(ranked_resources, key=lambda x: x['relevance_score'], reverse=True)

for res in sorted_resources[:5]: # Show top 5 resources
output += f"\n\n📚 {res['title']} (Score: {res['relevance_score']}/10)"
output += f"\n {res['summary']}"
output += f"\n Why useful: {res['usefulness']}"
output += f"\n URL: {res['url']}"

return output

def main():
"""Main execution function."""
try:
job_url = input(f"{Colors.YELLOW}Enter job posting URL: {Colors.RESET}")

# Extract requirements
job_info = extract_job_requirements(job_url, firecrawl_api_key)
if not job_info:
print(f"{Colors.RED}Failed to extract job requirements.{Colors.RESET}")
return

# Get resources and generate plan
print(f"{Colors.YELLOW}Finding and ranking preparation resources...{Colors.RESET}")
resources = get_prep_resources(job_info.get('required_skills', []))
weeks = generate_weekly_plan(job_info.get('required_skills', []))

# Display results
print(format_output(job_info, resources, weeks))

except Exception as e:
print(f"{Colors.RED}An error occurred: {str(e)}{Colors.RESET}")

if __name__ == "__main__":
main()

0 comments on commit 2b7b740

Please sign in to comment.