Linkedin_AI_Agent/agent_modern.py at main · LiveWithCodeAnkit/Linkedin_AI_Agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#!/usr/bin/env python3
"""
Modern LinkedIn Profile Analyzer Agent
Uses advanced AI and modern scraping techniques to analyze LinkedIn profiles.
No external API dependencies - fully self-contained.
"""

import os
import json
from typing import Dict, Any
from langchain import hub
from langchain.agents import create_react_agent, AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

# Import our modern scraping tools
from scraper_modern import scraper_tool
from linkedin_url import linkedin_url

# Load environment variables
load_dotenv()

# Set LangSmith API key for tracing (optional)
if os.environ.get("LANGSMITH_API_KEY"):
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

class ModernLinkedInAgent:
    """
    Modern LinkedIn Profile Analyzer using AI and advanced scraping
    """

    def __init__(self, model: str = "gpt-4", temperature: float = 0.1):
        """Initialize the agent with specified model and temperature"""
        self.llm = ChatOpenAI(temperature=temperature, model_name=model)
        self.tools = [linkedin_url, scraper_tool]

        # Create the ReAct agent
        react_prompt = hub.pull("hwchase17/react")
        self.agent = create_react_agent(
            llm=self.llm,
            tools=self.tools,
            prompt=react_prompt
        )
        self.agent_executor = AgentExecutor(
            agent=self.agent,
            tools=self.tools,
            verbose=True,
            max_iterations=5,
            handle_parsing_errors=True
        )

    def analyze_profile(self, name: str) -> Dict[str, Any]:
        """
        Analyze a LinkedIn profile and generate insights

        Args:
            name: Full name of the person to analyze

        Returns:
            Dictionary with analysis results
        """

        # Enhanced prompt for better results
        prompt = f"""
You are an expert LinkedIn profile analyzer. Your task is to find and analyze the LinkedIn profile for "{name}".

Follow these steps:
1. First, use the linkedin_url tool to find the LinkedIn profile URL for "{name}"
2. Then, use the Modern LinkedIn Scraper tool to extract profile data from the URL
3. Analyze the extracted data and generate insights

Your final output must be a valid JSON object with these exact keys:
- "summary": A professional 2-3 sentence summary highlighting key career aspects
- "interesting_facts": An array of exactly 2 interesting facts about the person
- "profile_pic_url": The profile picture URL (if available)
- "full_name": The person's full name as found on LinkedIn
- "headline": Their current professional headline

Important guidelines:
- Base your analysis only on the actual scraped data
- If scraping fails, acknowledge this in your response
- Keep facts professional and relevant
- Ensure the JSON is properly formatted
- Do not make up information not found in the profile

Begin your analysis now.
"""

        try:
            # Execute the agent
            result = self.agent_executor.invoke({"input": prompt})
            output = result.get("output", "")

            # Try to parse the output as JSON
            return self._parse_agent_output(output, name)

        except Exception as e:
            return self._create_error_response(str(e), name)

    def _parse_agent_output(self, output: str, name: str) -> Dict[str, Any]:
        """Parse and validate agent output"""

        if not output or not output.strip():
            return self._create_error_response("Agent returned empty response", name)

        # Try to extract JSON from the output
        json_start = output.find('{')
        json_end = output.rfind('}') + 1

        if json_start >= 0 and json_end > json_start:
            json_str = output[json_start:json_end]
            try:
                parsed_data = json.loads(json_str)
                return self._validate_and_clean_response(parsed_data, name)
            except json.JSONDecodeError:
                pass

        # If no valid JSON found, create structured response from text
        return self._create_structured_response_from_text(output, name)

    def _validate_and_clean_response(self, data: Dict, name: str) -> Dict[str, Any]:
        """Validate and clean the parsed response"""

        # Ensure required fields exist
        cleaned = {
            "full_name": data.get("full_name", name),
            "headline": data.get("headline", "LinkedIn Professional"),
            "summary": data.get("summary", "Profile analysis completed"),
            "interesting_facts": data.get("interesting_facts", ["Analysis completed successfully"]),
            "profile_pic_url": data.get("profile_pic_url", "")
        }

        # Ensure interesting_facts is a list
        if not isinstance(cleaned["interesting_facts"], list):
            cleaned["interesting_facts"] = [str(cleaned["interesting_facts"])]

        # Limit to 2 facts
        if len(cleaned["interesting_facts"]) > 2:
            cleaned["interesting_facts"] = cleaned["interesting_facts"][:2]
        elif len(cleaned["interesting_facts"]) < 2:
            cleaned["interesting_facts"].append("Professional with LinkedIn presence")

        return cleaned

    def _create_structured_response_from_text(self, text: str, name: str) -> Dict[str, Any]:
        """Create structured response from unstructured text"""

        return {
            "full_name": name,
            "headline": "LinkedIn Professional",
            "summary": text[:200] + "..." if len(text) > 200 else text,
            "interesting_facts": [
                "Profile information extracted successfully",
                "Analysis completed using modern scraping techniques"
            ],
            "profile_pic_url": ""
        }

    def _create_error_response(self, error_msg: str, name: str) -> Dict[str, Any]:
        """Create error response in expected format"""

        return {
            "full_name": name,
            "headline": "Profile Analysis Error",
            "summary": f"Unable to analyze profile: {error_msg}",
            "interesting_facts": [
                "Profile may be private or require authentication",
                "Try again later or check the profile URL manually"
            ],
            "profile_pic_url": "",
            "error": error_msg
        }

# Global agent instance
_agent = ModernLinkedInAgent()

def generate_profile_summary_and_facts_single_step(name: str) -> str:
    """
    Generate LinkedIn profile analysis for a given name

    Args:
        name: Full name of the person to analyze

    Returns:
        JSON string with analysis results
    """
    result = _agent.analyze_profile(name)
    return json.dumps(result, indent=2)

def analyze_linkedin_profile(name: str, model: str = "gpt-4") -> Dict[str, Any]:
    """
    Analyze LinkedIn profile and return structured data

    Args:
        name: Full name of the person to analyze
        model: AI model to use for analysis

    Returns:
        Dictionary with analysis results
    """
    agent = ModernLinkedInAgent(model=model)
    return agent.analyze_profile(name)

# Example usage and testing
if __name__ == "__main__":
    print("🤖 Modern LinkedIn Profile Analyzer")
    print("=" * 40)

    # Interactive mode
    while True:
        name = input("\nEnter full name (or 'quit' to exit): ").strip()

        if name.lower() in ['quit', 'exit', 'q']:
            print("👋 Goodbye!")
            break

        if not name:
            print("❌ Please enter a valid name")
            continue

        print(f"\n🔍 Analyzing profile for: {name}")
        print("⏳ This may take a moment...")

        try:
            result = generate_profile_summary_and_facts_single_step(name)
            print("\n📊 Analysis Results:")
            print(result)
        except Exception as e:
            print(f"\n❌ Error: {e}")

        print("\n" + "="*50)