Pipeline run
eb569f66-0751-45b7-ad10-b9c1d998e7ce
Client output enrichment
v2 Skill cluster · Nature of work · AI index · Tech stack maturity · Evidence · KRA descriptionvocab breakdown (legacy)
Signals
Post-classification
Captured for admin review
1 POST /skills/extract-from-jd
2 POST /skills/extract-details
3 POST /skills/final-role-output
Data Engineer
CASE Aslug: data-engineer · id: 2 · source: db
Exact alias hit on data-engineer (1.0) — no other alias at this confidence; skill_top absent does not contradict
Resolution:
in_db
— role exists in library; skill↔dim and role↔dim links saved when applicable.
Job description
Greetings from TCS!!!!!! We have an opportunity for Big Data Major Skill: Pyspark and Scala Experience: 4+ Years Work Mode: Work from office Location: Chennai/Mumbai/Pune/ Jd: Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills. Do impact analysis and come up with estimates Take responsibility for end-to-end deliverable. Create Project Plan & Work on Implementation Strategy Need to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers Need to Handle Customer Communications and Management Reporting
Skills from this JD
Each row merges API 1 extraction, API 2 library match / v3 orchestration (dimensions + locked dims), and API 3 persistence tags.
Skill enrichment (orchestrator / LLM)
No Stage 7 enrichment blob on this skill (orchestrator skipped enrichment).
- Category
- Data Engineering Tools
- Sub-category
- general
- Skill nature
- CONCEPT
- Volatility
- MEDIUM
- Typical lifespan
- MULTI_YEAR
- Version strategy
- UNVERSIONED
Library artifacts (this run)
| Kind | Detail | DB id |
|---|---|---|
| canonical_skill_proposed | ETL | type=Data Engineering Tools subtype=general nature=CONCEPT lifespan=MULTI_YEAR |
nano JD Parser — gpt-4.1-nano click to toggle
Show raw JSON
{
"JD_type": "pass",
"about_company": null,
"certifications": [],
"company_name": "TCS",
"ctc": null,
"domain": {
"primary": {
"aliases": [],
"domain": "Other"
},
"secondary": null
},
"education": [],
"experience": {
"max": null,
"min": 4,
"raw": "4+ Years"
},
"job_locations": [
{
"aliases": [
"Madras"
],
"city": "Chennai",
"country": "India",
"state": "Tamil Nadu",
"work_mode": "onsite"
},
{
"aliases": [],
"city": "Mumbai",
"country": "India",
"state": "Maharashtra",
"work_mode": "onsite"
},
{
"aliases": [],
"city": "Pune",
"country": "India",
"state": "Maharashtra",
"work_mode": "onsite"
}
],
"role": "Big Data",
"role_aliases": [
"Big Data Engineer",
"Data Engineer"
],
"role_archetype": "Data",
"roles_and_responsibilities": [
{
"bullet_count": 0,
"heading": "JD",
"heading_was_present": true,
"source_marker": {
"first_5_words": "Ingest data from disparate sources",
"last_5_words": "and Management Reporting"
},
"text": "Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills.\nDo impact analysis and come up with estimates\nTake responsibility for end-to-end deliverable.\nCreate Project Plan \u0026 Work on Implementation Strategy\nNeed to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers\nNeed to Handle Customer Communications and Management Reporting",
"word_count": 54
}
],
"urls": []
}
API 1 — extract-from-jd click to toggle
{
"final_skills": [
{
"is_primary": true,
"skill_name": "ETL"
}
],
"jd_role": {
"display_name": "Big Data",
"rationale": null,
"role_aliases": [
"Big Data Engineer",
"Data Engineer"
],
"role_archetype": "Data",
"slug": ""
},
"nano_parsed": {
"JD_type": "pass",
"about_company": null,
"certifications": [],
"company_name": "TCS",
"ctc": null,
"domain": {
"primary": {
"aliases": [],
"domain": "Other"
},
"secondary": null
},
"education": [],
"experience": {
"max": null,
"min": 4,
"raw": "4+ Years"
},
"job_locations": [
{
"aliases": [
"Madras"
],
"city": "Chennai",
"country": "India",
"state": "Tamil Nadu",
"work_mode": "onsite"
},
{
"aliases": [],
"city": "Mumbai",
"country": "India",
"state": "Maharashtra",
"work_mode": "onsite"
},
{
"aliases": [],
"city": "Pune",
"country": "India",
"state": "Maharashtra",
"work_mode": "onsite"
}
],
"role": "Big Data",
"role_aliases": [
"Big Data Engineer",
"Data Engineer"
],
"role_archetype": "Data",
"roles_and_responsibilities": [
{
"bullet_count": 0,
"heading": "JD",
"heading_was_present": true,
"source_marker": {
"first_5_words": "Ingest data from disparate sources",
"last_5_words": "and Management Reporting"
},
"text": "Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills.\nDo impact analysis and come up with estimates\nTake responsibility for end-to-end deliverable.\nCreate Project Plan \u0026 Work on Implementation Strategy\nNeed to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers\nNeed to Handle Customer Communications and Management Reporting",
"word_count": 54
}
],
"urls": []
},
"rejected": false,
"rejection_reason": null,
"run_id": "eb569f66-0751-45b7-ad10-b9c1d998e7ce",
"stage3_signals": {
"alias_found": true,
"alias_match_roles": [
{
"display_name": "Data Engineer",
"kra_matches": null,
"matched_count": null,
"matched_skills": null,
"role_id": 2,
"score": 1.0,
"slug": "data-engineer",
"total_count": null
}
],
"kra_match_roles": [
{
"display_name": "Data Engineer",
"kra_matches": [
{
"kra_text": "Builds data ingestion pipelines to collect data from transactional databases, third-party APIs, event streams, and file sources into centralized data platforms.",
"sentence": "Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills.",
"similarity": 0.6243
},
{
"kra_text": "Works with data analysts, data scientists, and business stakeholders to define data models, ingestion schedules, and data delivery requirements.",
"sentence": "Need to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers",
"similarity": 0.5135
},
{
"kra_text": "Works with data analysts, data scientists, and business stakeholders to define data models, ingestion schedules, and data delivery requirements.",
"sentence": "Need to Handle Customer Communications and Management Reporting",
"similarity": 0.3472
}
],
"matched_count": null,
"matched_skills": null,
"role_id": 2,
"score": 0.495,
"slug": "data-engineer",
"total_count": null
},
{
"display_name": "Engineering Manager",
"kra_matches": [
{
"kra_text": "Set team goals and delivery plans",
"sentence": "Create Project Plan \u0026 Work on Implementation Strategy",
"similarity": 0.5284
},
{
"kra_text": "Set team goals and delivery plans",
"sentence": "Take responsibility for end-to-end deliverable.",
"similarity": 0.506
},
{
"kra_text": "Set team goals and delivery plans",
"sentence": "Do impact analysis and come up with estimates",
"similarity": 0.3899
}
],
"matched_count": null,
"matched_skills": null,
"role_id": 121,
"score": 0.4748,
"slug": "engineering-manager",
"total_count": null
},
{
"display_name": "Pega Developer",
"kra_matches": [
{
"kra_text": "Requirements analysis and process translation",
"sentence": "Do impact analysis and come up with estimates",
"similarity": 0.4462
},
{
"kra_text": "Requirements analysis and process translation",
"sentence": "Need to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers",
"similarity": 0.4414
},
{
"kra_text": "business rule configuration and maintenance",
"sentence": "Need to Handle Customer Communications and Management Reporting",
"similarity": 0.3975
}
],
"matched_count": null,
"matched_skills": null,
"role_id": 24,
"score": 0.4284,
"slug": "pega-developer",
"total_count": null
},
{
"display_name": "Svelte Frontend Developer",
"kra_matches": [
{
"kra_text": "backend data integration",
"sentence": "Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills.",
"similarity": 0.4532
},
{
"kra_text": "backend data integration",
"sentence": "Need to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers",
"similarity": 0.4467
},
{
"kra_text": "backend data integration",
"sentence": "Need to Handle Customer Communications and Management Reporting",
"similarity": 0.3706
}
],
"matched_count": null,
"matched_skills": null,
"role_id": 92,
"score": 0.4235,
"slug": "svelte-frontend-developer",
"total_count": null
},
{
"display_name": "Java Backend Developer",
"kra_matches": [
{
"kra_text": "persistence and data modeling",
"sentence": "Need to have comprehensive understanding on ETL concepts and Cross Environment Data Transfers",
"similarity": 0.4672
},
{
"kra_text": "persistence and data modeling",
"sentence": "Ingest data from disparate sources (Structured, unstructured and semi-structured) and develop ETL jobs using the above skills.",
"similarity": 0.4127
},
{
"kra_text": "Server-side business logic implementation",
"sentence": "Need to Handle Customer Communications and Management Reporting",
"similarity": 0.378
}
],
"matched_count": null,
"matched_skills": null,
"role_id": 79,
"score": 0.4193,
"slug": "java-backend-developer",
"total_count": null
}
],
"skill_match_roles": []
},
"stage4_decision": {
"alias_collision_detected": false,
"case": "A",
"chosen_role": {
"display_name": "Data Engineer",
"kra_matches": null,
"matched_count": null,
"matched_skills": null,
"role_id": 2,
"score": 1.0,
"slug": "data-engineer",
"total_count": null
},
"confidence": 1.0,
"is_new_role": false,
"llm2_fired": false,
"llm2_reasoning": null,
"matched_dimensions": [],
"matched_kras": [],
"matched_skills": [],
"new_role_display_name": null,
"new_role_slug": null,
"queued": false,
"reasoning": "Exact alias hit on data-engineer (1.0) \u2014 no other alias at this confidence; skill_top absent does not contradict",
"sub_role": null
},
"stage5_updates": {
"centroid_n_after": 256,
"centroid_updated": true,
"collision_log_id": null,
"new_kra_attached": null,
"new_skills_attached": [
{
"is_primary": true,
"queue_id": 12807,
"role_display_name": "Data Engineer",
"role_slug": "data-engineer",
"skill_name": "ETL",
"status": "pending"
}
],
"queue_entry_id": null,
"v3_pipeline_triggered": false,
"v3_role_slug": null,
"v3_run_id": null
}
}
API 2 — extract-details
{
"alias_matches": [],
"candidate_roles": [],
"chosen_role": {
"display_name": "Data Engineer",
"id": 2,
"rationale": "Exact alias hit on data-engineer (1.0) \u2014 no other alias at this confidence; skill_top absent does not contradict",
"role_archetype": null,
"slug": "data-engineer",
"source": "db"
},
"dimensions": [],
"input_final_skills": [
"ETL"
],
"input_llm_skills": [
"ETL"
],
"new_aliases_persisted": 0,
"run_id": "eb569f66-0751-45b7-ad10-b9c1d998e7ce",
"skills_detail": [
{
"aliases_in_db": [],
"canonical": null,
"dimensions": [],
"input_skill": "ETL",
"matched_via": null,
"new_alias_persisted": false,
"new_alias_text": null,
"new_skill_meta": {
"derived": {
"category": "Data Engineering Tools",
"skill_nature": "CONCEPT",
"sub_category": "general",
"typical_lifespan": "MULTI_YEAR",
"version_strategy": "UNVERSIONED",
"volatility": "MEDIUM"
},
"enrichment": null,
"keep_log": [],
"locked_dimensions": [],
"merge_log": [],
"placed": null,
"relationships": null,
"skill_id": "etl",
"split_log": [],
"typed": null,
"warnings": []
},
"source_tag": "llm",
"was_in_llm_skills": true
}
],
"unmatched_skills": [
"ETL"
]
}
API 3 — final-role-output
{
"chosen_role": {
"display_name": "Data Engineer",
"id": 2,
"rationale": "Exact alias hit on data-engineer (1.0) \u2014 no other alias at this confidence; skill_top absent does not contradict",
"role_archetype": null,
"slug": "data-engineer",
"source": "db"
},
"chosen_role_resolution": "in_db",
"final_input_skills": [
{
"skill": "ETL",
"tag": "new"
}
],
"llm_cost_api1_usd": null,
"llm_cost_api2_usd": null,
"llm_cost_api3_usd": null,
"llm_cost_total_usd": null,
"persistence": {
"items": [],
"new_skills_created": 0,
"role_dimension_saved": 0,
"skill_dimension_saved": 0,
"skipped": 0
},
"planner_output": null,
"run_id": "eb569f66-0751-45b7-ad10-b9c1d998e7ce"
}
LLM Calls
Every model call made for this run, in pipeline order. Click a card to see the model's response.