databricks · lennartkats-db · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -192,6 +192,24 @@ the system as a whole a bit (btw each tool should be defined in a separate .go f
   - further implementation guidance: i want acceptance tests for each of these project types (app, dashboard, job, pipeline);
     this means they should be exposed as a hidden command like 'databricks experimental aitools tool add_project_resource --json <json>'. having these tests will be instrumental for iterating on them; the initing should not fail! note that the tool subcommand should just assume that the cwd is the current project dir.
 
+- the "workspace_info" tool:
+    - description: Get information about Databricks workspaces. Call without parameters to list all available workspaces and get current workspace details. Call with a profile parameter to get detailed information about a specific workspace (warehouse, user, etc).
+    - parameter: profile - optional workspace profile name. If provided, returns detailed information about that specific workspace. If omitted, lists all available workspaces and shows details for the current workspace.
+    - implementation:
+      - When called without parameters:
+        1. Shows current workspace details (profile, host, cloud, user, warehouse, catalog)
+        2. Lists all available workspace profiles with their URLs and cloud providers (if multiple exist)
+        3. Provides guidance on how to get details about other workspaces and how to use --profile flag
+      - When called with a profile parameter:
+        1. Validates the profile exists in ~/.databrickscfg
+        2. Shows workspace URL and cloud provider
+        3. Gets current user via SCIM API
+        4. Gets default SQL warehouse using GetDefaultWarehouse()
+        5. Gets default Unity Catalog if available
+    - output: Formatted text with workspace information (profile, host, cloud, user, warehouse, catalog)
+    - implementation: Single workspace_info.go file with getWorkspaceDetails, listWorkspacesWithCurrent, getCurrentUser, and getDefaultCatalog helpers
+    - key use case: When user wants to know what workspaces they have access to, or get connection details for a specific workspace
+
 - the "explore" tool:
     - description: CALL THIS FIRST when user mentions a workspace by name or asks about workspace resources. Shows available workspaces/profiles, default warehouse, and provides guidance on exploring jobs, clusters, catalogs, and other Databricks resources. Use this to discover what's available before running CLI commands.
     - no parameters needed

@@ -66,6 +66,7 @@ type mcpServer struct {
 // getAllTools returns all tools (definitions + handlers) for the MCP server.
 func getAllTools() []tools.Tool {
 	return []tools.Tool{
+		tools.WorkspaceInfoTool,
 		tools.InvokeDatabricksCLITool,
 		tools.InitProjectTool,
 		tools.AnalyzeProjectTool,

@@ -2,6 +2,7 @@ package tools
 
 import (
 	"context"
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -10,6 +11,7 @@ import (
 	"github.com/databricks/cli/experimental/aitools/auth"
 	"github.com/databricks/cli/experimental/aitools/tools/prompts"
 	"github.com/databricks/cli/experimental/aitools/tools/resources"
+	"github.com/databricks/cli/libs/log"
 )
 
 // AnalyzeProjectTool analyzes a Databricks project and returns guidance.
@@ -70,14 +72,23 @@ func AnalyzeProject(ctx context.Context, args analyzeProjectArgs) (string, error
 			string(content)
 	}
 
+	// Get current workspace context
+	currentProfile := getCurrentProfile(ctx)
+	workspaceContext, err := getWorkspaceDetails(ctx, currentProfile)
+	if err != nil {
+		log.Debugf(ctx, "Failed to get workspace details (non-fatal): %v", err)
+		workspaceContext = fmt.Sprintf("Current Profile: %s\n(Unable to load detailed workspace information)", currentProfile)
+	}
+
 	// Get default warehouse for apps and other resources that need it
-	warehouse, err := GetDefaultWarehouse(ctx)
+	warehouse, err := GetDefaultWarehouse(ctx, currentProfile)
 	resourceGuidance := getResourceGuidance(args.ProjectPath, warehouse)
 
 	data := map[string]string{
 		"Summary":          summary,
 		"ReadmeContent":    readmeContent,
 		"ResourceGuidance": resourceGuidance,
+		"WorkspaceContext": workspaceContext,
 	}
 
 	if err == nil && warehouse != nil {

@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"github.com/databricks/cli/experimental/aitools/tools/prompts"
-	"github.com/databricks/cli/libs/databrickscfg/profile"
 	"github.com/databricks/cli/libs/env"
 	"github.com/databricks/cli/libs/exec"
 	"github.com/databricks/cli/libs/log"
@@ -18,23 +17,30 @@ import (
 var ExploreTool = Tool{
 	Definition: ToolDefinition{
 		Name:        "explore",
-		Description: "**REQUIRED DURING PLAN MODE** - Call this FIRST when planning ANY Databricks work. Use this to discover available workspaces, warehouses, and get workflow recommendations for your specific task. Even if you're just reading an assignment document, call this first. Especially important when task involves: creating Databricks projects/apps/pipelines/jobs, SQL pipelines or data transformation workflows, deploying code to multiple environments (dev/prod), or working with databricks.yml files. You DON'T need a workspace name - call this when starting ANY Databricks planning to understand workspace capabilities and recommended tooling before you create your plan.",
+		Description: "**REQUIRED DURING PLAN MODE** - Call this FIRST when planning ANY Databricks work. Discovers available workspaces, shows current workspace details (URL, warehouse, user), and provides comprehensive workflow recommendations. Even if you're just reading an assignment document, call this first. Especially important when task involves: creating Databricks projects/apps/pipelines/jobs, SQL pipelines or data transformation workflows, deploying code to multiple environments (dev/prod), or working with databricks.yml files.",
 		InputSchema: map[string]any{
 			"type":       "object",
 			"properties": map[string]any{},
 		},
 	},
 	Handler: func(ctx context.Context, params map[string]any) (string, error) {
-		warehouse, err := GetDefaultWarehouse(ctx)
+		// Get workspace context via listWorkspacesWithCurrent
+		workspaceContext, err := listWorkspacesWithCurrent(ctx)
 		if err != nil {
-			log.Debugf(ctx, "Failed to get default warehouse (non-fatal): %v", err)
-			warehouse = nil
+			log.Debugf(ctx, "Failed to get workspace context (non-fatal): %v", err)
+			workspaceContext = "Unable to load workspace information. You may need to authenticate first."
 		}
 
+		// Get warehouse ID for SQL query examples in guidance
 		currentProfile := getCurrentProfile(ctx)
-		profiles := getAvailableProfiles(ctx)
+		warehouse, err := GetDefaultWarehouse(ctx, currentProfile)
+		warehouseID := ""
+		if err == nil && warehouse != nil {
+			warehouseID = warehouse.ID
+		}
 
-		return generateExploreGuidance(warehouse, currentProfile, profiles), nil
+		// Generate guidance with warehouse context
+		return generateExploreGuidance(workspaceContext, warehouseID), nil
 	},
 }
 
@@ -47,13 +53,21 @@ type warehouse struct {
 // GetDefaultWarehouse finds a suitable SQL warehouse for queries.
 // It filters out warehouses the user cannot access and prefers RUNNING warehouses,
 // then falls back to STOPPED ones (which auto-start).
-func GetDefaultWarehouse(ctx context.Context) (*warehouse, error) {
+// The profile parameter specifies which workspace profile to use (defaults to DEFAULT if empty).
+func GetDefaultWarehouse(ctx context.Context, profile string) (*warehouse, error) {
 	executor, err := exec.NewCommandExecutor("")
 	if err != nil {
 		return nil, fmt.Errorf("failed to create command executor: %w", err)
 	}
 
-	output, err := executor.Exec(ctx, fmt.Sprintf(`"%s" api get "/api/2.0/sql/warehouses?skip_cannot_use=true" --output json`, GetCLIPath()))
+	// Build the CLI command with optional --profile flag
+	cmd := fmt.Sprintf(`"%s"`, GetCLIPath())
+	if profile != "" && profile != "DEFAULT" {
+		cmd += fmt.Sprintf(` --profile "%s"`, profile)
+	}
+	cmd += ` api get "/api/2.0/sql/warehouses?skip_cannot_use=true" --output json`
+
+	output, err := executor.Exec(ctx, cmd)
 	if err != nil {
 		return nil, fmt.Errorf("failed to list warehouses: %w\nOutput: %s", err, output)
 	}
@@ -98,69 +112,10 @@ func getCurrentProfile(ctx context.Context) string {
 	return profileName
 }
 
-// getAvailableProfiles returns all available profiles from ~/.databrickscfg.
-func getAvailableProfiles(ctx context.Context) profile.Profiles {
-	profiles, err := profile.DefaultProfiler.LoadProfiles(ctx, profile.MatchAllProfiles)
-	if err != nil {
-		// If we can't load profiles, return empty list (config file might not exist)
-		return profile.Profiles{}
-	}
-	return profiles
-}
-
 // generateExploreGuidance creates comprehensive guidance for data exploration.
-func generateExploreGuidance(warehouse *warehouse, currentProfile string, profiles profile.Profiles) string {
-	// Build workspace/profile information
-	workspaceInfo := "Current Workspace Profile: " + currentProfile
-	if len(profiles) > 0 {
-		// Find current profile details
-		var currentHost string
-		for _, p := range profiles {
-			if p.Name == currentProfile {
-				currentHost = p.Host
-				if cloud := p.Cloud(); cloud != "" {
-					currentHost = fmt.Sprintf("%s (%s)", currentHost, cloud)
-				}
-				break
-			}
-		}
-		if currentHost != "" {
-			workspaceInfo = fmt.Sprintf("Current Workspace Profile: %s - %s", currentProfile, currentHost)
-		}
-	}
-
-	// Build available profiles list
-	profilesInfo := ""
-	if len(profiles) > 1 {
-		profilesInfo = "\n\nAvailable Workspace Profiles:\n"
-		for _, p := range profiles {
-			marker := ""
-			if p.Name == currentProfile {
-				marker = " (current)"
-			}
-			cloud := p.Cloud()
-			if cloud != "" {
-				profilesInfo += fmt.Sprintf("  - %s: %s (%s)%s\n", p.Name, p.Host, cloud, marker)
-			} else {
-				profilesInfo += fmt.Sprintf("  - %s: %s%s\n", p.Name, p.Host, marker)
-			}
-		}
-		profilesInfo += "\n  To use a different workspace, add --profile <name> to any command:\n"
-		profilesInfo += "    invoke_databricks_cli '--profile prod catalogs list'\n"
-	}
-
-	// Handle warehouse information (may be nil if lookup failed)
-	warehouseName := ""
-	warehouseID := ""
-	if warehouse != nil {
-		warehouseName = warehouse.Name
-		warehouseID = warehouse.ID
-	}
-
+func generateExploreGuidance(workspaceContext, warehouseID string) string {
 	return prompts.MustExecuteTemplate("explore.tmpl", map[string]string{
-		"WorkspaceInfo": workspaceInfo,
-		"WarehouseName": warehouseName,
-		"WarehouseID":   warehouseID,
-		"ProfilesInfo":  profilesInfo,
+		"WorkspaceContext": workspaceContext,
+		"WarehouseID":      warehouseID,
 	})
 }
@@ -14,6 +14,12 @@ Project Analysis
 
 {{.Summary}}
 
+Current Workspace
+-----------------
+{{.WorkspaceContext}}
+
+Use workspace_info(profile='<name>') to get details about other workspaces.
+
 Guidance for Working with this Project
 --------------------------------------
 

@@ -10,11 +10,10 @@
 Databricks Data Exploration Guide
 =====================================
 
-{{.WorkspaceInfo}}{{if .WarehouseName}}
-Default SQL Warehouse: {{.WarehouseName}} ({{.WarehouseID}}){{else}}
-Note: No SQL warehouse detected. SQL queries will require warehouse_id to be specified manually.{{end}}{{.ProfilesInfo}}
+{{.WorkspaceContext}}
 
 IMPORTANT: Use the invoke_databricks_cli tool to run all commands below!
+Use workspace_info(profile='<name>') to get details about other workspaces.
 
 
 1. EXECUTING SQL QUERIES
@@ -76,7 +75,99 @@ IMPORTANT: Use the invoke_databricks_cli tool to run all commands below!
 Getting Started:
 - Use the commands above to explore what resources exist in the workspace
 - All commands support --output json for programmatic access
-- Remember to add --profile <name> when working with non-default workspaces
+- To use a different workspace: workspace_info(profile='<name>') then invoke_databricks_cli('--profile <name> <command>')
+
+WORKFLOW PATTERNS FOR NOTEBOOKS
+===============================
+
+Create notebooks locally (.ipynb), fetch data from Databricks, generate visualizations.
+
+## Setup (one-time)
+
+```bash
+# Install uv
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+**pyproject.toml** dev dependencies:
+```toml
+[dependency-groups]
+dev = [
+    "databricks-connect>=15.4.0",
+    "papermill",
+    "nbformat",
+    "matplotlib",
+]
+```
+
+```bash
+# Install deps
+uv sync
+
+# Auto-init spark (no boilerplate in notebooks)
+mkdir -p ~/.ipython/profile_default/startup
+cat > ~/.ipython/profile_default/startup/00-databricks-spark.py << 'EOF'
+try:
+    from databricks.connect import DatabricksSession
+    spark = DatabricksSession.builder.getOrCreate()
+except: pass
+EOF
+```
+
+## Workflow
+
+**Create or update notebook:**
+
+Add exploratory cells to an .ipynb notebook like
+
+```python
+import pandas as pd, matplotlib.pyplot as plt
+
+# Aggregate in Spark, limit for viz
+df = spark.sql("""
+    SELECT category, COUNT(*) as count, AVG(value) as avg_value
+    FROM catalog.schema.table
+    GROUP BY category
+    ORDER BY count DESC
+""").limit(10000).toPandas()
+
+df.plot(x='category', y='avg_value', kind='bar', figsize=(10, 6))
+plt.title('Average Value by Category')
+plt.show()
+```
+
+**Execute:**
+
+Execute the notebook and produce results inline in the ipynb:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> DATABRICKS_SERVERLESS_COMPUTE_ID=auto \
+  uv run papermill notebook.ipynb notebook_executed.ipynb -k python3
+```
+
+**Iterate:**
+
+Actually iterating over a notebook is MANDATORY. You can't assume that it will just be successful.
+
+Papermill embeds all outputs (stdout, stderr, plots as base64, errors) into the executed .ipynb.
+
+To iterate:
+1. Read `notebook_executed.ipynb` to see results (outputs are in cell JSON)
+2. Check for errors, review data/plots
+4. Optionally, do any quick exploratory queries directly via the CLI
+5. Modify `notebook.ipynb` based on results
+6. Re-execute and repeat
+
+**View in IDE:**
+- `cursor notebook_executed.ipynb` or `code notebook_executed.ipynb`
+- Or deploy: `databricks bundle deploy` and open in workspace browser
+
+## Key Pattern
+
+**Aggregate → Limit → Pandas → Visualize → Read outputs → Iterate**
+
+Always aggregate in Spark (GROUP BY, AVG, COUNT), then `.limit(10000)` before `.toPandas()`. Execute with papermill, read the executed .ipynb to see results, iterate.
+
 
 
 WORKFLOW PATTERNS FOR DATABRICKS PROJECTS