diff --git a/mcp/src/client.ts b/mcp/src/client.ts index 876d533c..e8ebbf16 100644 --- a/mcp/src/client.ts +++ b/mcp/src/client.ts @@ -18,47 +18,75 @@ export interface PaginationParams { size?: number; } +export interface SpiderTemplateParams { + project_name?: string; + spider_name?: string; + start_urls?: string; + allowed_domains?: string; +} + export interface Spider { _id: string; name: string; + col_id?: string; + col_name?: string; + db_name?: string; description?: string; + database_id?: string; + project_id?: string; + mode?: string; // random, all, selected-nodes + node_ids?: string[]; + git_id?: string; + git_root_path?: string; + template?: string; + template_params?: SpiderTemplateParams; cmd: string; param?: string; - project_id?: string; - type?: string; - tags?: string[]; - created_ts?: Date; - updated_ts?: Date; + priority?: number; // 1-10, default 5 + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; } export interface Task { _id: string; spider_id: string; - spider_name?: string; + status: string; // pending, assigned, running, finished, error, cancelled, abnormal + node_id?: string; cmd: string; param?: string; - priority?: number; - status: string; - log_path?: string; - result_count?: number; error?: string; - start_ts?: Date; - end_ts?: Date; - created_ts?: Date; - updated_ts?: Date; + pid?: number; + schedule_id?: string; + mode?: string; + priority?: number; + node_ids?: string[]; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; } export interface Node { _id: string; + key?: string; name: string; - description?: string; ip: string; mac: string; hostname: string; - status: string; + description?: string; is_master: boolean; - created_ts?: Date; - updated_ts?: Date; + status: string; + enabled?: boolean; + active?: boolean; + active_at?: Date; + current_runners?: number; + max_runners?: number; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; } export interface Schedule { @@ -66,13 +94,91 @@ export interface Schedule { name: string; description?: string; spider_id: string; - spider_name?: string; cron: string; + entry_id?: number; // cron entry ID cmd?: string; param?: string; + mode?: string; + node_ids?: string[]; + priority?: number; enabled: boolean; - created_ts?: Date; - updated_ts?: Date; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; +} + +export interface Project { + _id: string; + name: string; + description?: string; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; +} + +export interface Database { + _id: string; + name: string; + description?: string; + data_source: string; + host: string; + port: number; + uri?: string; + database?: string; + username?: string; + status: string; + error?: string; + active: boolean; + active_at?: Date; + is_default?: boolean; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; +} + +export interface Git { + _id: string; + url: string; + name: string; + auth_type?: string; + username?: string; + current_branch?: string; + status: string; + error?: string; + created_at?: Date; + updated_at?: Date; + created_by?: string; + updated_by?: string; +} + +export interface SpiderStat { + _id: string; + last_task_id?: string; + tasks: number; + results: number; + wait_duration?: number; // in seconds + runtime_duration?: number; // in seconds + total_duration?: number; // in seconds + average_wait_duration?: number; + average_runtime_duration?: number; + average_total_duration?: number; + created_at?: Date; + updated_at?: Date; +} + +export interface TaskStat { + _id: string; + started_at?: Date; + ended_at?: Date; + wait_duration?: number; // in milliseconds + runtime_duration?: number; // in milliseconds + total_duration?: number; // in milliseconds + result_count: number; + created_at?: Date; + updated_at?: Date; } export class CrawlabClient { @@ -136,6 +242,8 @@ export class CrawlabClient { cmd?: string; param?: string; priority?: number; + mode?: string; + node_ids?: string[]; } ): Promise> { const response = await this.client.post(`/spiders/${id}/run`, params); @@ -225,6 +333,21 @@ export class CrawlabClient { return response.data; } + async updateNode(id: string, node: Partial): Promise> { + const response = await this.client.put(`/nodes/${id}`, node); + return response.data; + } + + async enableNode(id: string): Promise> { + const response = await this.client.post(`/nodes/${id}/enable`); + return response.data; + } + + async disableNode(id: string): Promise> { + const response = await this.client.post(`/nodes/${id}/disable`); + return response.data; + } + // Schedules async getSchedules( params?: PaginationParams @@ -268,6 +391,110 @@ export class CrawlabClient { return response.data; } + // Projects + async getProjects(params?: PaginationParams): Promise> { + const response = await this.client.get('/projects', { params }); + return response.data; + } + + async getProject(id: string): Promise> { + const response = await this.client.get(`/projects/${id}`); + return response.data; + } + + async createProject(project: Partial): Promise> { + const response = await this.client.post('/projects', project); + return response.data; + } + + async updateProject(id: string, project: Partial): Promise> { + const response = await this.client.put(`/projects/${id}`, project); + return response.data; + } + + async deleteProject(id: string): Promise> { + const response = await this.client.delete(`/projects/${id}`); + return response.data; + } + + // Databases + async getDatabases(params?: PaginationParams): Promise> { + const response = await this.client.get('/databases', { params }); + return response.data; + } + + async getDatabase(id: string): Promise> { + const response = await this.client.get(`/databases/${id}`); + return response.data; + } + + async createDatabase(database: Partial): Promise> { + const response = await this.client.post('/databases', database); + return response.data; + } + + async updateDatabase(id: string, database: Partial): Promise> { + const response = await this.client.put(`/databases/${id}`, database); + return response.data; + } + + async deleteDatabase(id: string): Promise> { + const response = await this.client.delete(`/databases/${id}`); + return response.data; + } + + async testDatabaseConnection(id: string): Promise> { + const response = await this.client.post(`/databases/${id}/test`); + return response.data; + } + + // Git repositories + async getGitRepos(params?: PaginationParams): Promise> { + const response = await this.client.get('/gits', { params }); + return response.data; + } + + async getGitRepo(id: string): Promise> { + const response = await this.client.get(`/gits/${id}`); + return response.data; + } + + async createGitRepo(git: Partial): Promise> { + const response = await this.client.post('/gits', git); + return response.data; + } + + async updateGitRepo(id: string, git: Partial): Promise> { + const response = await this.client.put(`/gits/${id}`, git); + return response.data; + } + + async deleteGitRepo(id: string): Promise> { + const response = await this.client.delete(`/gits/${id}`); + return response.data; + } + + async pullGitRepo(id: string): Promise> { + const response = await this.client.post(`/gits/${id}/pull`); + return response.data; + } + + async cloneGitRepo(id: string): Promise> { + const response = await this.client.post(`/gits/${id}/clone`); + return response.data; + } + + // Statistics + async getSpiderStats(id: string): Promise> { + const response = await this.client.get(`/spiders/${id}/stats`); + return response.data; + } + + async getTaskStats(id: string): Promise> { + const response = await this.client.get(`/tasks/${id}/stats`); + return response.data; + } + // Health check async healthCheck(): Promise { try { diff --git a/mcp/src/tools.ts b/mcp/src/tools.ts index a821ed7c..dba2eccb 100644 --- a/mcp/src/tools.ts +++ b/mcp/src/tools.ts @@ -6,6 +6,10 @@ import { configureTaskTools } from "./tools/tasks.js"; import { configureNodeTools } from "./tools/nodes.js"; import { configureScheduleTools } from "./tools/schedules.js"; import { configureSystemTools } from "./tools/system.js"; +import { configureProjectTools } from "./tools/projects.js"; +import { configureDatabaseTools } from "./tools/databases.js"; +import { configureGitTools } from "./tools/git.js"; +import { configureStatsTools } from "./tools/stats.js"; export function configureAllTools(server: McpServer, client: CrawlabClient) { configureSpiderTools(server, client); @@ -13,4 +17,8 @@ export function configureAllTools(server: McpServer, client: CrawlabClient) { configureNodeTools(server, client); configureScheduleTools(server, client); configureSystemTools(server, client); + configureProjectTools(server, client); + configureDatabaseTools(server, client); + configureGitTools(server, client); + configureStatsTools(server, client); } diff --git a/mcp/src/tools/databases.ts b/mcp/src/tools/databases.ts new file mode 100644 index 00000000..623d4b23 --- /dev/null +++ b/mcp/src/tools/databases.ts @@ -0,0 +1,242 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { CrawlabClient } from "../client.js"; +import { z } from "zod"; + +const DATABASE_TOOLS = { + list_databases: "crawlab_list_databases", + get_database: "crawlab_get_database", + create_database: "crawlab_create_database", + update_database: "crawlab_update_database", + delete_database: "crawlab_delete_database", + test_database_connection: "crawlab_test_database_connection", +}; + +export function configureDatabaseTools(server: McpServer, client: CrawlabClient) { + server.tool( + DATABASE_TOOLS.list_databases, + "List all databases in Crawlab", + { + page: z.number().optional().describe("Page number for pagination (default: 1)"), + size: z.number().optional().describe("Number of databases per page (default: 10)"), + }, + async ({ page, size }) => { + try { + const response = await client.getDatabases({ page, size }); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error listing databases: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + DATABASE_TOOLS.get_database, + "Get details of a specific database", + { + database_id: z.string().describe("The ID of the database to retrieve"), + }, + async ({ database_id }) => { + try { + const response = await client.getDatabase(database_id); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting database: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + DATABASE_TOOLS.create_database, + "Create a new database connection", + { + name: z.string().describe("Name of the database connection"), + description: z.string().optional().describe("Description of the database"), + data_source: z.string().describe("Data source type (mongo, mysql, postgres, etc.)"), + host: z.string().describe("Database host"), + port: z.number().describe("Database port"), + database: z.string().optional().describe("Database name"), + username: z.string().optional().describe("Database username"), + password: z.string().optional().describe("Database password"), + uri: z.string().optional().describe("Database URI (alternative to individual connection params)"), + }, + async ({ name, description, data_source, host, port, database, username, password, uri }) => { + try { + const databaseData = { + name, + description, + data_source, + host, + port, + database, + username, + password, + uri, + }; + const response = await client.createDatabase(databaseData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error creating database: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + DATABASE_TOOLS.update_database, + "Update an existing database connection", + { + database_id: z.string().describe("The ID of the database to update"), + name: z.string().optional().describe("New name for the database"), + description: z.string().optional().describe("New description for the database"), + data_source: z.string().optional().describe("New data source type"), + host: z.string().optional().describe("New database host"), + port: z.number().optional().describe("New database port"), + database: z.string().optional().describe("New database name"), + username: z.string().optional().describe("New database username"), + password: z.string().optional().describe("New database password"), + uri: z.string().optional().describe("New database URI"), + }, + async ({ database_id, name, description, data_source, host, port, database, username, password, uri }) => { + try { + const updateData = { + ...(name && { name }), + ...(description && { description }), + ...(data_source && { data_source }), + ...(host && { host }), + ...(port && { port }), + ...(database && { database }), + ...(username && { username }), + ...(password && { password }), + ...(uri && { uri }), + }; + const response = await client.updateDatabase(database_id, updateData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error updating database: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + DATABASE_TOOLS.delete_database, + "Delete a database connection", + { + database_id: z.string().describe("The ID of the database to delete"), + }, + async ({ database_id }) => { + try { + const response = await client.deleteDatabase(database_id); + return { + content: [ + { + type: "text", + text: `Database ${database_id} deleted successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error deleting database: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + DATABASE_TOOLS.test_database_connection, + "Test a database connection", + { + database_id: z.string().describe("The ID of the database to test"), + }, + async ({ database_id }) => { + try { + const response = await client.testDatabaseConnection(database_id); + return { + content: [ + { + type: "text", + text: `Database connection test result: ${JSON.stringify(response, null, 2)}`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error testing database connection: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); +} + +export { DATABASE_TOOLS }; diff --git a/mcp/src/tools/git.ts b/mcp/src/tools/git.ts new file mode 100644 index 00000000..f57bd5b8 --- /dev/null +++ b/mcp/src/tools/git.ts @@ -0,0 +1,258 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { CrawlabClient } from "../client.js"; +import { z } from "zod"; + +const GIT_TOOLS = { + list_git_repos: "crawlab_list_git_repos", + get_git_repo: "crawlab_get_git_repo", + create_git_repo: "crawlab_create_git_repo", + update_git_repo: "crawlab_update_git_repo", + delete_git_repo: "crawlab_delete_git_repo", + clone_git_repo: "crawlab_clone_git_repo", + pull_git_repo: "crawlab_pull_git_repo", +}; + +export function configureGitTools(server: McpServer, client: CrawlabClient) { + server.tool( + GIT_TOOLS.list_git_repos, + "List all Git repositories in Crawlab", + { + page: z.number().optional().describe("Page number for pagination (default: 1)"), + size: z.number().optional().describe("Number of repositories per page (default: 10)"), + }, + async ({ page, size }) => { + try { + const response = await client.getGitRepos({ page, size }); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error listing Git repositories: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.get_git_repo, + "Get details of a specific Git repository", + { + git_id: z.string().describe("The ID of the Git repository to retrieve"), + }, + async ({ git_id }) => { + try { + const response = await client.getGitRepo(git_id); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.create_git_repo, + "Create a new Git repository connection", + { + name: z.string().describe("Name of the Git repository"), + url: z.string().describe("Git repository URL"), + auth_type: z.enum(["public", "private", "token"]).optional().describe("Authentication type"), + username: z.string().optional().describe("Git username (for private repos)"), + password: z.string().optional().describe("Git password or token"), + }, + async ({ name, url, auth_type, username, password }) => { + try { + const gitData = { + name, + url, + auth_type, + username, + password, + }; + const response = await client.createGitRepo(gitData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error creating Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.update_git_repo, + "Update an existing Git repository connection", + { + git_id: z.string().describe("The ID of the Git repository to update"), + name: z.string().optional().describe("New name for the repository"), + url: z.string().optional().describe("New Git repository URL"), + auth_type: z.enum(["public", "private", "token"]).optional().describe("New authentication type"), + username: z.string().optional().describe("New Git username"), + password: z.string().optional().describe("New Git password or token"), + }, + async ({ git_id, name, url, auth_type, username, password }) => { + try { + const updateData = { + ...(name && { name }), + ...(url && { url }), + ...(auth_type && { auth_type }), + ...(username && { username }), + ...(password && { password }), + }; + const response = await client.updateGitRepo(git_id, updateData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error updating Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.delete_git_repo, + "Delete a Git repository connection", + { + git_id: z.string().describe("The ID of the Git repository to delete"), + }, + async ({ git_id }) => { + try { + const response = await client.deleteGitRepo(git_id); + return { + content: [ + { + type: "text", + text: `Git repository ${git_id} deleted successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error deleting Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.clone_git_repo, + "Clone a Git repository", + { + git_id: z.string().describe("The ID of the Git repository to clone"), + }, + async ({ git_id }) => { + try { + const response = await client.cloneGitRepo(git_id); + return { + content: [ + { + type: "text", + text: `Git repository ${git_id} cloned successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error cloning Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + GIT_TOOLS.pull_git_repo, + "Pull latest changes from a Git repository", + { + git_id: z.string().describe("The ID of the Git repository to pull"), + }, + async ({ git_id }) => { + try { + const response = await client.pullGitRepo(git_id); + return { + content: [ + { + type: "text", + text: `Git repository ${git_id} pulled successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error pulling Git repository: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); +} + +export { GIT_TOOLS }; diff --git a/mcp/src/tools/nodes.ts b/mcp/src/tools/nodes.ts index 415849f1..3afef79f 100644 --- a/mcp/src/tools/nodes.ts +++ b/mcp/src/tools/nodes.ts @@ -5,6 +5,9 @@ import { z } from "zod"; const NODE_TOOLS = { list_nodes: "crawlab_list_nodes", get_node: "crawlab_get_node", + update_node: "crawlab_update_node", + enable_node: "crawlab_enable_node", + disable_node: "crawlab_disable_node", }; export function configureNodeTools(server: McpServer, client: CrawlabClient) { @@ -70,6 +73,109 @@ export function configureNodeTools(server: McpServer, client: CrawlabClient) { } } ); + + server.tool( + NODE_TOOLS.update_node, + "Update a node configuration", + { + node_id: z.string().describe("The ID of the node to update"), + name: z.string().optional().describe("New name for the node"), + description: z.string().optional().describe("New description for the node"), + max_runners: z.number().optional().describe("New maximum number of concurrent runners"), + enabled: z.boolean().optional().describe("Whether the node is enabled"), + }, + async ({ node_id, name, description, max_runners, enabled }) => { + try { + const updateData = { + ...(name && { name }), + ...(description && { description }), + ...(max_runners && { max_runners }), + ...(enabled !== undefined && { enabled }), + }; + const response = await client.updateNode(node_id, updateData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error updating node: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + NODE_TOOLS.enable_node, + "Enable a node", + { + node_id: z.string().describe("The ID of the node to enable"), + }, + async ({ node_id }) => { + try { + const response = await client.enableNode(node_id); + return { + content: [ + { + type: "text", + text: `Node ${node_id} enabled successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error enabling node: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + NODE_TOOLS.disable_node, + "Disable a node", + { + node_id: z.string().describe("The ID of the node to disable"), + }, + async ({ node_id }) => { + try { + const response = await client.disableNode(node_id); + return { + content: [ + { + type: "text", + text: `Node ${node_id} disabled successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error disabling node: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); } export { NODE_TOOLS }; diff --git a/mcp/src/tools/projects.ts b/mcp/src/tools/projects.ts new file mode 100644 index 00000000..b9c0e549 --- /dev/null +++ b/mcp/src/tools/projects.ts @@ -0,0 +1,182 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { CrawlabClient } from "../client.js"; +import { z } from "zod"; + +const PROJECT_TOOLS = { + list_projects: "crawlab_list_projects", + get_project: "crawlab_get_project", + create_project: "crawlab_create_project", + update_project: "crawlab_update_project", + delete_project: "crawlab_delete_project", +}; + +export function configureProjectTools(server: McpServer, client: CrawlabClient) { + server.tool( + PROJECT_TOOLS.list_projects, + "List all projects in Crawlab", + { + page: z.number().optional().describe("Page number for pagination (default: 1)"), + size: z.number().optional().describe("Number of projects per page (default: 10)"), + }, + async ({ page, size }) => { + try { + const response = await client.getProjects({ page, size }); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error listing projects: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + PROJECT_TOOLS.get_project, + "Get details of a specific project", + { + project_id: z.string().describe("The ID of the project to retrieve"), + }, + async ({ project_id }) => { + try { + const response = await client.getProject(project_id); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting project: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + PROJECT_TOOLS.create_project, + "Create a new project", + { + name: z.string().describe("Name of the project"), + description: z.string().optional().describe("Description of the project"), + }, + async ({ name, description }) => { + try { + const projectData = { + name, + description, + }; + const response = await client.createProject(projectData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error creating project: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + PROJECT_TOOLS.update_project, + "Update an existing project", + { + project_id: z.string().describe("The ID of the project to update"), + name: z.string().optional().describe("New name for the project"), + description: z.string().optional().describe("New description for the project"), + }, + async ({ project_id, name, description }) => { + try { + const updateData = { + ...(name && { name }), + ...(description && { description }), + }; + const response = await client.updateProject(project_id, updateData); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error updating project: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + PROJECT_TOOLS.delete_project, + "Delete a project", + { + project_id: z.string().describe("The ID of the project to delete"), + }, + async ({ project_id }) => { + try { + const response = await client.deleteProject(project_id); + return { + content: [ + { + type: "text", + text: `Project ${project_id} deleted successfully.`, + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error deleting project: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); +} + +export { PROJECT_TOOLS }; diff --git a/mcp/src/tools/schedules.ts b/mcp/src/tools/schedules.ts index a168ac0a..c793e742 100644 --- a/mcp/src/tools/schedules.ts +++ b/mcp/src/tools/schedules.ts @@ -86,9 +86,12 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient) cron: z.string().describe("Cron expression for the schedule (e.g., '0 0 * * *' for daily at midnight)"), cmd: z.string().optional().describe("Command to override for scheduled runs"), param: z.string().optional().describe("Parameters to override for scheduled runs"), + mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"), + node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"), + priority: z.number().min(1).max(10).optional().describe("Task priority (1-10)"), enabled: z.boolean().optional().describe("Whether the schedule is enabled (default: true)"), }, - async ({ name, description, spider_id, cron, cmd, param, enabled = true }) => { + async ({ name, description, spider_id, cron, cmd, param, mode, node_ids, priority, enabled = true }) => { try { const scheduleData = { name, @@ -97,6 +100,9 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient) cron, cmd, param, + mode, + node_ids, + priority, enabled, }; const response = await client.createSchedule(scheduleData); @@ -133,9 +139,12 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient) cron: z.string().optional().describe("New cron expression for the schedule"), cmd: z.string().optional().describe("New command for the schedule"), param: z.string().optional().describe("New parameters for the schedule"), + mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("New task execution mode"), + node_ids: z.array(z.string()).optional().describe("New node IDs for selected-nodes mode"), + priority: z.number().min(1).max(10).optional().describe("New task priority (1-10)"), enabled: z.boolean().optional().describe("Whether the schedule is enabled"), }, - async ({ schedule_id, name, description, spider_id, cron, cmd, param, enabled }) => { + async ({ schedule_id, name, description, spider_id, cron, cmd, param, mode, node_ids, priority, enabled }) => { try { const updateData = { ...(name && { name }), @@ -144,6 +153,9 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient) ...(cron && { cron }), ...(cmd && { cmd }), ...(param && { param }), + ...(mode && { mode }), + ...(node_ids && { node_ids }), + ...(priority && { priority }), ...(enabled !== undefined && { enabled }), }; const response = await client.updateSchedule(schedule_id, updateData); diff --git a/mcp/src/tools/spiders.ts b/mcp/src/tools/spiders.ts index c87ad393..4f45d0d7 100644 --- a/mcp/src/tools/spiders.ts +++ b/mcp/src/tools/spiders.ts @@ -87,10 +87,17 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) { cmd: z.string().describe("Command to execute the spider"), param: z.string().optional().describe("Parameters for the spider command"), project_id: z.string().optional().describe("Project ID to associate with the spider"), - type: z.string().optional().describe("Type of spider (e.g., 'scrapy', 'selenium', 'custom')"), - tags: z.array(z.string()).optional().describe("Tags for categorizing the spider"), + database_id: z.string().optional().describe("Database ID for data storage"), + col_name: z.string().optional().describe("Collection/table name for results"), + db_name: z.string().optional().describe("Database name for results"), + mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"), + node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"), + git_id: z.string().optional().describe("Git repository ID"), + git_root_path: z.string().optional().describe("Git root path"), + template: z.string().optional().describe("Spider template"), + priority: z.number().min(1).max(10).optional().describe("Priority (1-10, default: 5)"), }, - async ({ name, description, cmd, param, project_id, type, tags }) => { + async ({ name, description, cmd, param, project_id, database_id, col_name, db_name, mode, node_ids, git_id, git_root_path, template, priority }) => { try { const spiderData = { name, @@ -98,8 +105,15 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) { cmd, param, project_id, - type, - tags, + database_id, + col_name, + db_name, + mode, + node_ids, + git_id, + git_root_path, + template, + priority: priority || 5, }; const response = await client.createSpider(spiderData); return { @@ -134,10 +148,17 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) { cmd: z.string().optional().describe("New command to execute the spider"), param: z.string().optional().describe("New parameters for the spider command"), project_id: z.string().optional().describe("New project ID to associate with the spider"), - type: z.string().optional().describe("New type of spider"), - tags: z.array(z.string()).optional().describe("New tags for the spider"), + database_id: z.string().optional().describe("New database ID for data storage"), + col_name: z.string().optional().describe("New collection/table name for results"), + db_name: z.string().optional().describe("New database name for results"), + mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("New task execution mode"), + node_ids: z.array(z.string()).optional().describe("New node IDs for selected-nodes mode"), + git_id: z.string().optional().describe("New git repository ID"), + git_root_path: z.string().optional().describe("New git root path"), + template: z.string().optional().describe("New spider template"), + priority: z.number().min(1).max(10).optional().describe("New priority (1-10)"), }, - async ({ spider_id, name, description, cmd, param, project_id, type, tags }) => { + async ({ spider_id, name, description, cmd, param, project_id, database_id, col_name, db_name, mode, node_ids, git_id, git_root_path, template, priority }) => { try { const updateData = { ...(name && { name }), @@ -145,8 +166,15 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) { ...(cmd && { cmd }), ...(param && { param }), ...(project_id && { project_id }), - ...(type && { type }), - ...(tags && { tags }), + ...(database_id && { database_id }), + ...(col_name && { col_name }), + ...(db_name && { db_name }), + ...(mode && { mode }), + ...(node_ids && { node_ids }), + ...(git_id && { git_id }), + ...(git_root_path && { git_root_path }), + ...(template && { template }), + ...(priority && { priority }), }; const response = await client.updateSpider(spider_id, updateData); return { @@ -209,14 +237,18 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) { spider_id: z.string().describe("The ID of the spider to run"), cmd: z.string().optional().describe("Override command for this run"), param: z.string().optional().describe("Override parameters for this run"), - priority: z.number().optional().describe("Priority of the task (1-10, higher = more priority)"), + priority: z.number().min(1).max(10).optional().describe("Task priority (1-10)"), + mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"), + node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"), }, - async ({ spider_id, cmd, param, priority }) => { + async ({ spider_id, cmd, param, priority, mode, node_ids }) => { try { const runData = { ...(cmd && { cmd }), ...(param && { param }), ...(priority && { priority }), + ...(mode && { mode }), + ...(node_ids && { node_ids }), }; const response = await client.runSpider(spider_id, runData); return { diff --git a/mcp/src/tools/stats.ts b/mcp/src/tools/stats.ts new file mode 100644 index 00000000..aa6bc21f --- /dev/null +++ b/mcp/src/tools/stats.ts @@ -0,0 +1,74 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { CrawlabClient } from "../client.js"; +import { z } from "zod"; + +const STATS_TOOLS = { + get_spider_stats: "crawlab_get_spider_stats", + get_task_stats: "crawlab_get_task_stats", +}; + +export function configureStatsTools(server: McpServer, client: CrawlabClient) { + server.tool( + STATS_TOOLS.get_spider_stats, + "Get statistics for a specific spider", + { + spider_id: z.string().describe("The ID of the spider to get statistics for"), + }, + async ({ spider_id }) => { + try { + const response = await client.getSpiderStats(spider_id); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting spider statistics: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); + + server.tool( + STATS_TOOLS.get_task_stats, + "Get statistics for a specific task", + { + task_id: z.string().describe("The ID of the task to get statistics for"), + }, + async ({ task_id }) => { + try { + const response = await client.getTaskStats(task_id); + return { + content: [ + { + type: "text", + text: JSON.stringify(response, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting task statistics: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); +} + +export { STATS_TOOLS }; diff --git a/mcp/src/tools/system.ts b/mcp/src/tools/system.ts index 9bf4d611..d5625bf1 100644 --- a/mcp/src/tools/system.ts +++ b/mcp/src/tools/system.ts @@ -4,6 +4,7 @@ import { CrawlabClient } from "../client.js"; const SYSTEM_TOOLS = { health_check: "crawlab_health_check", system_status: "crawlab_system_status", + get_system_overview: "crawlab_get_system_overview", }; export function configureSystemTools(server: McpServer, client: CrawlabClient) { @@ -64,8 +65,11 @@ export function configureSystemTools(server: McpServer, client: CrawlabClient) { total_recent: tasksResponse.total || 0, running: tasksResponse.data?.filter(task => task.status === 'running').length || 0, pending: tasksResponse.data?.filter(task => task.status === 'pending').length || 0, - completed: tasksResponse.data?.filter(task => task.status === 'success').length || 0, - failed: tasksResponse.data?.filter(task => task.status === 'error').length || 0, + assigned: tasksResponse.data?.filter(task => task.status === 'assigned').length || 0, + finished: tasksResponse.data?.filter(task => task.status === 'finished').length || 0, + error: tasksResponse.data?.filter(task => task.status === 'error').length || 0, + cancelled: tasksResponse.data?.filter(task => task.status === 'cancelled').length || 0, + abnormal: tasksResponse.data?.filter(task => task.status === 'abnormal').length || 0, }, schedules: { total: schedulesResponse.total || 0, @@ -96,6 +100,78 @@ export function configureSystemTools(server: McpServer, client: CrawlabClient) { } } ); + + server.tool( + SYSTEM_TOOLS.get_system_overview, + "Get a high-level overview of the Crawlab system including projects, spiders, nodes, and activity", + {}, + async () => { + try { + // Get comprehensive system data + const [projectsResponse, spidersResponse, nodesResponse, tasksResponse, schedulesResponse] = await Promise.all([ + client.getProjects({ page: 1, size: 100 }), + client.getSpiders({ page: 1, size: 100 }), + client.getNodes({ page: 1, size: 50 }), + client.getTasks({ page: 1, size: 50 }), + client.getSchedules({ page: 1, size: 100 }), + ]); + + const overview = { + timestamp: new Date().toISOString(), + system_health: await client.healthCheck(), + summary: { + projects: projectsResponse.total || 0, + spiders: spidersResponse.total || 0, + nodes: nodesResponse.total || 0, + recent_tasks: tasksResponse.total || 0, + schedules: schedulesResponse.total || 0, + }, + nodes_detail: { + total: nodesResponse.total || 0, + master_nodes: nodesResponse.data?.filter(node => node.is_master).length || 0, + worker_nodes: nodesResponse.data?.filter(node => !node.is_master).length || 0, + active: nodesResponse.data?.filter(node => node.active).length || 0, + enabled: nodesResponse.data?.filter(node => node.enabled).length || 0, + }, + tasks_detail: { + by_status: { + pending: tasksResponse.data?.filter(task => task.status === 'pending').length || 0, + assigned: tasksResponse.data?.filter(task => task.status === 'assigned').length || 0, + running: tasksResponse.data?.filter(task => task.status === 'running').length || 0, + finished: tasksResponse.data?.filter(task => task.status === 'finished').length || 0, + error: tasksResponse.data?.filter(task => task.status === 'error').length || 0, + cancelled: tasksResponse.data?.filter(task => task.status === 'cancelled').length || 0, + abnormal: tasksResponse.data?.filter(task => task.status === 'abnormal').length || 0, + } + }, + schedules_detail: { + total: schedulesResponse.total || 0, + enabled: schedulesResponse.data?.filter(schedule => schedule.enabled).length || 0, + disabled: schedulesResponse.data?.filter(schedule => !schedule.enabled).length || 0, + }, + }; + + return { + content: [ + { + type: "text", + text: JSON.stringify(overview, null, 2), + }, + ], + }; + } catch (error) { + return { + content: [ + { + type: "text", + text: `Error getting system overview: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + } + ); } export { SYSTEM_TOOLS }; diff --git a/mcp/src/tools/tasks.ts b/mcp/src/tools/tasks.ts index 130fc7b6..51099410 100644 --- a/mcp/src/tools/tasks.ts +++ b/mcp/src/tools/tasks.ts @@ -20,7 +20,7 @@ export function configureTaskTools(server: McpServer, client: CrawlabClient) { page: z.number().optional().describe("Page number for pagination (default: 1)"), size: z.number().optional().describe("Number of tasks per page (default: 10)"), spider_id: z.string().optional().describe("Filter by spider ID"), - status: z.string().optional().describe("Filter by task status (pending, running, success, error, cancelled)"), + status: z.string().optional().describe("Filter by task status (pending, assigned, running, finished, error, cancelled, abnormal)"), }, async ({ page, size, spider_id, status }) => { try {