feat: Add database and git tools, enhance node and spider configurations

- Implemented database tools for CRUD operations and connection testing.
- Added git tools for managing repositories including create, update, delete, clone, and pull functionalities.
- Enhanced node tools with update, enable, and disable functionalities.
- Updated spider tools to include new parameters for database integration, git repository association, and task execution modes.
- Improved schedule tools with additional parameters for task execution modes and priority settings.
- Introduced system overview tool for comprehensive system health and status reporting.
- Updated task filtering to include new task statuses.
This commit is contained in:
Marvin Zhang
2025-06-19 15:52:09 +08:00
parent c29e21deec
commit 3976b93e66
11 changed files with 1255 additions and 38 deletions

View File

@@ -18,47 +18,75 @@ export interface PaginationParams {
size?: number;
}
export interface SpiderTemplateParams {
project_name?: string;
spider_name?: string;
start_urls?: string;
allowed_domains?: string;
}
export interface Spider {
_id: string;
name: string;
col_id?: string;
col_name?: string;
db_name?: string;
description?: string;
database_id?: string;
project_id?: string;
mode?: string; // random, all, selected-nodes
node_ids?: string[];
git_id?: string;
git_root_path?: string;
template?: string;
template_params?: SpiderTemplateParams;
cmd: string;
param?: string;
project_id?: string;
type?: string;
tags?: string[];
created_ts?: Date;
updated_ts?: Date;
priority?: number; // 1-10, default 5
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Task {
_id: string;
spider_id: string;
spider_name?: string;
status: string; // pending, assigned, running, finished, error, cancelled, abnormal
node_id?: string;
cmd: string;
param?: string;
priority?: number;
status: string;
log_path?: string;
result_count?: number;
error?: string;
start_ts?: Date;
end_ts?: Date;
created_ts?: Date;
updated_ts?: Date;
pid?: number;
schedule_id?: string;
mode?: string;
priority?: number;
node_ids?: string[];
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Node {
_id: string;
key?: string;
name: string;
description?: string;
ip: string;
mac: string;
hostname: string;
status: string;
description?: string;
is_master: boolean;
created_ts?: Date;
updated_ts?: Date;
status: string;
enabled?: boolean;
active?: boolean;
active_at?: Date;
current_runners?: number;
max_runners?: number;
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Schedule {
@@ -66,13 +94,91 @@ export interface Schedule {
name: string;
description?: string;
spider_id: string;
spider_name?: string;
cron: string;
entry_id?: number; // cron entry ID
cmd?: string;
param?: string;
mode?: string;
node_ids?: string[];
priority?: number;
enabled: boolean;
created_ts?: Date;
updated_ts?: Date;
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Project {
_id: string;
name: string;
description?: string;
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Database {
_id: string;
name: string;
description?: string;
data_source: string;
host: string;
port: number;
uri?: string;
database?: string;
username?: string;
status: string;
error?: string;
active: boolean;
active_at?: Date;
is_default?: boolean;
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface Git {
_id: string;
url: string;
name: string;
auth_type?: string;
username?: string;
current_branch?: string;
status: string;
error?: string;
created_at?: Date;
updated_at?: Date;
created_by?: string;
updated_by?: string;
}
export interface SpiderStat {
_id: string;
last_task_id?: string;
tasks: number;
results: number;
wait_duration?: number; // in seconds
runtime_duration?: number; // in seconds
total_duration?: number; // in seconds
average_wait_duration?: number;
average_runtime_duration?: number;
average_total_duration?: number;
created_at?: Date;
updated_at?: Date;
}
export interface TaskStat {
_id: string;
started_at?: Date;
ended_at?: Date;
wait_duration?: number; // in milliseconds
runtime_duration?: number; // in milliseconds
total_duration?: number; // in milliseconds
result_count: number;
created_at?: Date;
updated_at?: Date;
}
export class CrawlabClient {
@@ -136,6 +242,8 @@ export class CrawlabClient {
cmd?: string;
param?: string;
priority?: number;
mode?: string;
node_ids?: string[];
}
): Promise<ApiResponse<string[]>> {
const response = await this.client.post(`/spiders/${id}/run`, params);
@@ -225,6 +333,21 @@ export class CrawlabClient {
return response.data;
}
async updateNode(id: string, node: Partial<Node>): Promise<ApiResponse<Node>> {
const response = await this.client.put(`/nodes/${id}`, node);
return response.data;
}
async enableNode(id: string): Promise<ApiResponse<void>> {
const response = await this.client.post(`/nodes/${id}/enable`);
return response.data;
}
async disableNode(id: string): Promise<ApiResponse<void>> {
const response = await this.client.post(`/nodes/${id}/disable`);
return response.data;
}
// Schedules
async getSchedules(
params?: PaginationParams
@@ -268,6 +391,110 @@ export class CrawlabClient {
return response.data;
}
// Projects
async getProjects(params?: PaginationParams): Promise<ApiResponse<Project[]>> {
const response = await this.client.get('/projects', { params });
return response.data;
}
async getProject(id: string): Promise<ApiResponse<Project>> {
const response = await this.client.get(`/projects/${id}`);
return response.data;
}
async createProject(project: Partial<Project>): Promise<ApiResponse<Project>> {
const response = await this.client.post('/projects', project);
return response.data;
}
async updateProject(id: string, project: Partial<Project>): Promise<ApiResponse<Project>> {
const response = await this.client.put(`/projects/${id}`, project);
return response.data;
}
async deleteProject(id: string): Promise<ApiResponse<void>> {
const response = await this.client.delete(`/projects/${id}`);
return response.data;
}
// Databases
async getDatabases(params?: PaginationParams): Promise<ApiResponse<Database[]>> {
const response = await this.client.get('/databases', { params });
return response.data;
}
async getDatabase(id: string): Promise<ApiResponse<Database>> {
const response = await this.client.get(`/databases/${id}`);
return response.data;
}
async createDatabase(database: Partial<Database>): Promise<ApiResponse<Database>> {
const response = await this.client.post('/databases', database);
return response.data;
}
async updateDatabase(id: string, database: Partial<Database>): Promise<ApiResponse<Database>> {
const response = await this.client.put(`/databases/${id}`, database);
return response.data;
}
async deleteDatabase(id: string): Promise<ApiResponse<void>> {
const response = await this.client.delete(`/databases/${id}`);
return response.data;
}
async testDatabaseConnection(id: string): Promise<ApiResponse<boolean>> {
const response = await this.client.post(`/databases/${id}/test`);
return response.data;
}
// Git repositories
async getGitRepos(params?: PaginationParams): Promise<ApiResponse<Git[]>> {
const response = await this.client.get('/gits', { params });
return response.data;
}
async getGitRepo(id: string): Promise<ApiResponse<Git>> {
const response = await this.client.get(`/gits/${id}`);
return response.data;
}
async createGitRepo(git: Partial<Git>): Promise<ApiResponse<Git>> {
const response = await this.client.post('/gits', git);
return response.data;
}
async updateGitRepo(id: string, git: Partial<Git>): Promise<ApiResponse<Git>> {
const response = await this.client.put(`/gits/${id}`, git);
return response.data;
}
async deleteGitRepo(id: string): Promise<ApiResponse<void>> {
const response = await this.client.delete(`/gits/${id}`);
return response.data;
}
async pullGitRepo(id: string): Promise<ApiResponse<void>> {
const response = await this.client.post(`/gits/${id}/pull`);
return response.data;
}
async cloneGitRepo(id: string): Promise<ApiResponse<void>> {
const response = await this.client.post(`/gits/${id}/clone`);
return response.data;
}
// Statistics
async getSpiderStats(id: string): Promise<ApiResponse<SpiderStat>> {
const response = await this.client.get(`/spiders/${id}/stats`);
return response.data;
}
async getTaskStats(id: string): Promise<ApiResponse<TaskStat>> {
const response = await this.client.get(`/tasks/${id}/stats`);
return response.data;
}
// Health check
async healthCheck(): Promise<boolean> {
try {

View File

@@ -6,6 +6,10 @@ import { configureTaskTools } from "./tools/tasks.js";
import { configureNodeTools } from "./tools/nodes.js";
import { configureScheduleTools } from "./tools/schedules.js";
import { configureSystemTools } from "./tools/system.js";
import { configureProjectTools } from "./tools/projects.js";
import { configureDatabaseTools } from "./tools/databases.js";
import { configureGitTools } from "./tools/git.js";
import { configureStatsTools } from "./tools/stats.js";
export function configureAllTools(server: McpServer, client: CrawlabClient) {
configureSpiderTools(server, client);
@@ -13,4 +17,8 @@ export function configureAllTools(server: McpServer, client: CrawlabClient) {
configureNodeTools(server, client);
configureScheduleTools(server, client);
configureSystemTools(server, client);
configureProjectTools(server, client);
configureDatabaseTools(server, client);
configureGitTools(server, client);
configureStatsTools(server, client);
}

242
mcp/src/tools/databases.ts Normal file
View File

@@ -0,0 +1,242 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { CrawlabClient } from "../client.js";
import { z } from "zod";
const DATABASE_TOOLS = {
list_databases: "crawlab_list_databases",
get_database: "crawlab_get_database",
create_database: "crawlab_create_database",
update_database: "crawlab_update_database",
delete_database: "crawlab_delete_database",
test_database_connection: "crawlab_test_database_connection",
};
export function configureDatabaseTools(server: McpServer, client: CrawlabClient) {
server.tool(
DATABASE_TOOLS.list_databases,
"List all databases in Crawlab",
{
page: z.number().optional().describe("Page number for pagination (default: 1)"),
size: z.number().optional().describe("Number of databases per page (default: 10)"),
},
async ({ page, size }) => {
try {
const response = await client.getDatabases({ page, size });
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error listing databases: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
DATABASE_TOOLS.get_database,
"Get details of a specific database",
{
database_id: z.string().describe("The ID of the database to retrieve"),
},
async ({ database_id }) => {
try {
const response = await client.getDatabase(database_id);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting database: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
DATABASE_TOOLS.create_database,
"Create a new database connection",
{
name: z.string().describe("Name of the database connection"),
description: z.string().optional().describe("Description of the database"),
data_source: z.string().describe("Data source type (mongo, mysql, postgres, etc.)"),
host: z.string().describe("Database host"),
port: z.number().describe("Database port"),
database: z.string().optional().describe("Database name"),
username: z.string().optional().describe("Database username"),
password: z.string().optional().describe("Database password"),
uri: z.string().optional().describe("Database URI (alternative to individual connection params)"),
},
async ({ name, description, data_source, host, port, database, username, password, uri }) => {
try {
const databaseData = {
name,
description,
data_source,
host,
port,
database,
username,
password,
uri,
};
const response = await client.createDatabase(databaseData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error creating database: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
DATABASE_TOOLS.update_database,
"Update an existing database connection",
{
database_id: z.string().describe("The ID of the database to update"),
name: z.string().optional().describe("New name for the database"),
description: z.string().optional().describe("New description for the database"),
data_source: z.string().optional().describe("New data source type"),
host: z.string().optional().describe("New database host"),
port: z.number().optional().describe("New database port"),
database: z.string().optional().describe("New database name"),
username: z.string().optional().describe("New database username"),
password: z.string().optional().describe("New database password"),
uri: z.string().optional().describe("New database URI"),
},
async ({ database_id, name, description, data_source, host, port, database, username, password, uri }) => {
try {
const updateData = {
...(name && { name }),
...(description && { description }),
...(data_source && { data_source }),
...(host && { host }),
...(port && { port }),
...(database && { database }),
...(username && { username }),
...(password && { password }),
...(uri && { uri }),
};
const response = await client.updateDatabase(database_id, updateData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error updating database: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
DATABASE_TOOLS.delete_database,
"Delete a database connection",
{
database_id: z.string().describe("The ID of the database to delete"),
},
async ({ database_id }) => {
try {
const response = await client.deleteDatabase(database_id);
return {
content: [
{
type: "text",
text: `Database ${database_id} deleted successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error deleting database: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
DATABASE_TOOLS.test_database_connection,
"Test a database connection",
{
database_id: z.string().describe("The ID of the database to test"),
},
async ({ database_id }) => {
try {
const response = await client.testDatabaseConnection(database_id);
return {
content: [
{
type: "text",
text: `Database connection test result: ${JSON.stringify(response, null, 2)}`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error testing database connection: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { DATABASE_TOOLS };

258
mcp/src/tools/git.ts Normal file
View File

@@ -0,0 +1,258 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { CrawlabClient } from "../client.js";
import { z } from "zod";
const GIT_TOOLS = {
list_git_repos: "crawlab_list_git_repos",
get_git_repo: "crawlab_get_git_repo",
create_git_repo: "crawlab_create_git_repo",
update_git_repo: "crawlab_update_git_repo",
delete_git_repo: "crawlab_delete_git_repo",
clone_git_repo: "crawlab_clone_git_repo",
pull_git_repo: "crawlab_pull_git_repo",
};
export function configureGitTools(server: McpServer, client: CrawlabClient) {
server.tool(
GIT_TOOLS.list_git_repos,
"List all Git repositories in Crawlab",
{
page: z.number().optional().describe("Page number for pagination (default: 1)"),
size: z.number().optional().describe("Number of repositories per page (default: 10)"),
},
async ({ page, size }) => {
try {
const response = await client.getGitRepos({ page, size });
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error listing Git repositories: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.get_git_repo,
"Get details of a specific Git repository",
{
git_id: z.string().describe("The ID of the Git repository to retrieve"),
},
async ({ git_id }) => {
try {
const response = await client.getGitRepo(git_id);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.create_git_repo,
"Create a new Git repository connection",
{
name: z.string().describe("Name of the Git repository"),
url: z.string().describe("Git repository URL"),
auth_type: z.enum(["public", "private", "token"]).optional().describe("Authentication type"),
username: z.string().optional().describe("Git username (for private repos)"),
password: z.string().optional().describe("Git password or token"),
},
async ({ name, url, auth_type, username, password }) => {
try {
const gitData = {
name,
url,
auth_type,
username,
password,
};
const response = await client.createGitRepo(gitData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error creating Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.update_git_repo,
"Update an existing Git repository connection",
{
git_id: z.string().describe("The ID of the Git repository to update"),
name: z.string().optional().describe("New name for the repository"),
url: z.string().optional().describe("New Git repository URL"),
auth_type: z.enum(["public", "private", "token"]).optional().describe("New authentication type"),
username: z.string().optional().describe("New Git username"),
password: z.string().optional().describe("New Git password or token"),
},
async ({ git_id, name, url, auth_type, username, password }) => {
try {
const updateData = {
...(name && { name }),
...(url && { url }),
...(auth_type && { auth_type }),
...(username && { username }),
...(password && { password }),
};
const response = await client.updateGitRepo(git_id, updateData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error updating Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.delete_git_repo,
"Delete a Git repository connection",
{
git_id: z.string().describe("The ID of the Git repository to delete"),
},
async ({ git_id }) => {
try {
const response = await client.deleteGitRepo(git_id);
return {
content: [
{
type: "text",
text: `Git repository ${git_id} deleted successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error deleting Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.clone_git_repo,
"Clone a Git repository",
{
git_id: z.string().describe("The ID of the Git repository to clone"),
},
async ({ git_id }) => {
try {
const response = await client.cloneGitRepo(git_id);
return {
content: [
{
type: "text",
text: `Git repository ${git_id} cloned successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error cloning Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
GIT_TOOLS.pull_git_repo,
"Pull latest changes from a Git repository",
{
git_id: z.string().describe("The ID of the Git repository to pull"),
},
async ({ git_id }) => {
try {
const response = await client.pullGitRepo(git_id);
return {
content: [
{
type: "text",
text: `Git repository ${git_id} pulled successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error pulling Git repository: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { GIT_TOOLS };

View File

@@ -5,6 +5,9 @@ import { z } from "zod";
const NODE_TOOLS = {
list_nodes: "crawlab_list_nodes",
get_node: "crawlab_get_node",
update_node: "crawlab_update_node",
enable_node: "crawlab_enable_node",
disable_node: "crawlab_disable_node",
};
export function configureNodeTools(server: McpServer, client: CrawlabClient) {
@@ -70,6 +73,109 @@ export function configureNodeTools(server: McpServer, client: CrawlabClient) {
}
}
);
server.tool(
NODE_TOOLS.update_node,
"Update a node configuration",
{
node_id: z.string().describe("The ID of the node to update"),
name: z.string().optional().describe("New name for the node"),
description: z.string().optional().describe("New description for the node"),
max_runners: z.number().optional().describe("New maximum number of concurrent runners"),
enabled: z.boolean().optional().describe("Whether the node is enabled"),
},
async ({ node_id, name, description, max_runners, enabled }) => {
try {
const updateData = {
...(name && { name }),
...(description && { description }),
...(max_runners && { max_runners }),
...(enabled !== undefined && { enabled }),
};
const response = await client.updateNode(node_id, updateData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error updating node: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
NODE_TOOLS.enable_node,
"Enable a node",
{
node_id: z.string().describe("The ID of the node to enable"),
},
async ({ node_id }) => {
try {
const response = await client.enableNode(node_id);
return {
content: [
{
type: "text",
text: `Node ${node_id} enabled successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error enabling node: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
NODE_TOOLS.disable_node,
"Disable a node",
{
node_id: z.string().describe("The ID of the node to disable"),
},
async ({ node_id }) => {
try {
const response = await client.disableNode(node_id);
return {
content: [
{
type: "text",
text: `Node ${node_id} disabled successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error disabling node: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { NODE_TOOLS };

182
mcp/src/tools/projects.ts Normal file
View File

@@ -0,0 +1,182 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { CrawlabClient } from "../client.js";
import { z } from "zod";
const PROJECT_TOOLS = {
list_projects: "crawlab_list_projects",
get_project: "crawlab_get_project",
create_project: "crawlab_create_project",
update_project: "crawlab_update_project",
delete_project: "crawlab_delete_project",
};
export function configureProjectTools(server: McpServer, client: CrawlabClient) {
server.tool(
PROJECT_TOOLS.list_projects,
"List all projects in Crawlab",
{
page: z.number().optional().describe("Page number for pagination (default: 1)"),
size: z.number().optional().describe("Number of projects per page (default: 10)"),
},
async ({ page, size }) => {
try {
const response = await client.getProjects({ page, size });
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error listing projects: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
PROJECT_TOOLS.get_project,
"Get details of a specific project",
{
project_id: z.string().describe("The ID of the project to retrieve"),
},
async ({ project_id }) => {
try {
const response = await client.getProject(project_id);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting project: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
PROJECT_TOOLS.create_project,
"Create a new project",
{
name: z.string().describe("Name of the project"),
description: z.string().optional().describe("Description of the project"),
},
async ({ name, description }) => {
try {
const projectData = {
name,
description,
};
const response = await client.createProject(projectData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error creating project: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
PROJECT_TOOLS.update_project,
"Update an existing project",
{
project_id: z.string().describe("The ID of the project to update"),
name: z.string().optional().describe("New name for the project"),
description: z.string().optional().describe("New description for the project"),
},
async ({ project_id, name, description }) => {
try {
const updateData = {
...(name && { name }),
...(description && { description }),
};
const response = await client.updateProject(project_id, updateData);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error updating project: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
PROJECT_TOOLS.delete_project,
"Delete a project",
{
project_id: z.string().describe("The ID of the project to delete"),
},
async ({ project_id }) => {
try {
const response = await client.deleteProject(project_id);
return {
content: [
{
type: "text",
text: `Project ${project_id} deleted successfully.`,
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error deleting project: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { PROJECT_TOOLS };

View File

@@ -86,9 +86,12 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient)
cron: z.string().describe("Cron expression for the schedule (e.g., '0 0 * * *' for daily at midnight)"),
cmd: z.string().optional().describe("Command to override for scheduled runs"),
param: z.string().optional().describe("Parameters to override for scheduled runs"),
mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"),
node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"),
priority: z.number().min(1).max(10).optional().describe("Task priority (1-10)"),
enabled: z.boolean().optional().describe("Whether the schedule is enabled (default: true)"),
},
async ({ name, description, spider_id, cron, cmd, param, enabled = true }) => {
async ({ name, description, spider_id, cron, cmd, param, mode, node_ids, priority, enabled = true }) => {
try {
const scheduleData = {
name,
@@ -97,6 +100,9 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient)
cron,
cmd,
param,
mode,
node_ids,
priority,
enabled,
};
const response = await client.createSchedule(scheduleData);
@@ -133,9 +139,12 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient)
cron: z.string().optional().describe("New cron expression for the schedule"),
cmd: z.string().optional().describe("New command for the schedule"),
param: z.string().optional().describe("New parameters for the schedule"),
mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("New task execution mode"),
node_ids: z.array(z.string()).optional().describe("New node IDs for selected-nodes mode"),
priority: z.number().min(1).max(10).optional().describe("New task priority (1-10)"),
enabled: z.boolean().optional().describe("Whether the schedule is enabled"),
},
async ({ schedule_id, name, description, spider_id, cron, cmd, param, enabled }) => {
async ({ schedule_id, name, description, spider_id, cron, cmd, param, mode, node_ids, priority, enabled }) => {
try {
const updateData = {
...(name && { name }),
@@ -144,6 +153,9 @@ export function configureScheduleTools(server: McpServer, client: CrawlabClient)
...(cron && { cron }),
...(cmd && { cmd }),
...(param && { param }),
...(mode && { mode }),
...(node_ids && { node_ids }),
...(priority && { priority }),
...(enabled !== undefined && { enabled }),
};
const response = await client.updateSchedule(schedule_id, updateData);

View File

@@ -87,10 +87,17 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) {
cmd: z.string().describe("Command to execute the spider"),
param: z.string().optional().describe("Parameters for the spider command"),
project_id: z.string().optional().describe("Project ID to associate with the spider"),
type: z.string().optional().describe("Type of spider (e.g., 'scrapy', 'selenium', 'custom')"),
tags: z.array(z.string()).optional().describe("Tags for categorizing the spider"),
database_id: z.string().optional().describe("Database ID for data storage"),
col_name: z.string().optional().describe("Collection/table name for results"),
db_name: z.string().optional().describe("Database name for results"),
mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"),
node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"),
git_id: z.string().optional().describe("Git repository ID"),
git_root_path: z.string().optional().describe("Git root path"),
template: z.string().optional().describe("Spider template"),
priority: z.number().min(1).max(10).optional().describe("Priority (1-10, default: 5)"),
},
async ({ name, description, cmd, param, project_id, type, tags }) => {
async ({ name, description, cmd, param, project_id, database_id, col_name, db_name, mode, node_ids, git_id, git_root_path, template, priority }) => {
try {
const spiderData = {
name,
@@ -98,8 +105,15 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) {
cmd,
param,
project_id,
type,
tags,
database_id,
col_name,
db_name,
mode,
node_ids,
git_id,
git_root_path,
template,
priority: priority || 5,
};
const response = await client.createSpider(spiderData);
return {
@@ -134,10 +148,17 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) {
cmd: z.string().optional().describe("New command to execute the spider"),
param: z.string().optional().describe("New parameters for the spider command"),
project_id: z.string().optional().describe("New project ID to associate with the spider"),
type: z.string().optional().describe("New type of spider"),
tags: z.array(z.string()).optional().describe("New tags for the spider"),
database_id: z.string().optional().describe("New database ID for data storage"),
col_name: z.string().optional().describe("New collection/table name for results"),
db_name: z.string().optional().describe("New database name for results"),
mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("New task execution mode"),
node_ids: z.array(z.string()).optional().describe("New node IDs for selected-nodes mode"),
git_id: z.string().optional().describe("New git repository ID"),
git_root_path: z.string().optional().describe("New git root path"),
template: z.string().optional().describe("New spider template"),
priority: z.number().min(1).max(10).optional().describe("New priority (1-10)"),
},
async ({ spider_id, name, description, cmd, param, project_id, type, tags }) => {
async ({ spider_id, name, description, cmd, param, project_id, database_id, col_name, db_name, mode, node_ids, git_id, git_root_path, template, priority }) => {
try {
const updateData = {
...(name && { name }),
@@ -145,8 +166,15 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) {
...(cmd && { cmd }),
...(param && { param }),
...(project_id && { project_id }),
...(type && { type }),
...(tags && { tags }),
...(database_id && { database_id }),
...(col_name && { col_name }),
...(db_name && { db_name }),
...(mode && { mode }),
...(node_ids && { node_ids }),
...(git_id && { git_id }),
...(git_root_path && { git_root_path }),
...(template && { template }),
...(priority && { priority }),
};
const response = await client.updateSpider(spider_id, updateData);
return {
@@ -209,14 +237,18 @@ export function configureSpiderTools(server: McpServer, client: CrawlabClient) {
spider_id: z.string().describe("The ID of the spider to run"),
cmd: z.string().optional().describe("Override command for this run"),
param: z.string().optional().describe("Override parameters for this run"),
priority: z.number().optional().describe("Priority of the task (1-10, higher = more priority)"),
priority: z.number().min(1).max(10).optional().describe("Task priority (1-10)"),
mode: z.enum(["random", "all", "selected-nodes"]).optional().describe("Task execution mode"),
node_ids: z.array(z.string()).optional().describe("Node IDs for selected-nodes mode"),
},
async ({ spider_id, cmd, param, priority }) => {
async ({ spider_id, cmd, param, priority, mode, node_ids }) => {
try {
const runData = {
...(cmd && { cmd }),
...(param && { param }),
...(priority && { priority }),
...(mode && { mode }),
...(node_ids && { node_ids }),
};
const response = await client.runSpider(spider_id, runData);
return {

74
mcp/src/tools/stats.ts Normal file
View File

@@ -0,0 +1,74 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { CrawlabClient } from "../client.js";
import { z } from "zod";
const STATS_TOOLS = {
get_spider_stats: "crawlab_get_spider_stats",
get_task_stats: "crawlab_get_task_stats",
};
export function configureStatsTools(server: McpServer, client: CrawlabClient) {
server.tool(
STATS_TOOLS.get_spider_stats,
"Get statistics for a specific spider",
{
spider_id: z.string().describe("The ID of the spider to get statistics for"),
},
async ({ spider_id }) => {
try {
const response = await client.getSpiderStats(spider_id);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting spider statistics: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
server.tool(
STATS_TOOLS.get_task_stats,
"Get statistics for a specific task",
{
task_id: z.string().describe("The ID of the task to get statistics for"),
},
async ({ task_id }) => {
try {
const response = await client.getTaskStats(task_id);
return {
content: [
{
type: "text",
text: JSON.stringify(response, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting task statistics: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { STATS_TOOLS };

View File

@@ -4,6 +4,7 @@ import { CrawlabClient } from "../client.js";
const SYSTEM_TOOLS = {
health_check: "crawlab_health_check",
system_status: "crawlab_system_status",
get_system_overview: "crawlab_get_system_overview",
};
export function configureSystemTools(server: McpServer, client: CrawlabClient) {
@@ -64,8 +65,11 @@ export function configureSystemTools(server: McpServer, client: CrawlabClient) {
total_recent: tasksResponse.total || 0,
running: tasksResponse.data?.filter(task => task.status === 'running').length || 0,
pending: tasksResponse.data?.filter(task => task.status === 'pending').length || 0,
completed: tasksResponse.data?.filter(task => task.status === 'success').length || 0,
failed: tasksResponse.data?.filter(task => task.status === 'error').length || 0,
assigned: tasksResponse.data?.filter(task => task.status === 'assigned').length || 0,
finished: tasksResponse.data?.filter(task => task.status === 'finished').length || 0,
error: tasksResponse.data?.filter(task => task.status === 'error').length || 0,
cancelled: tasksResponse.data?.filter(task => task.status === 'cancelled').length || 0,
abnormal: tasksResponse.data?.filter(task => task.status === 'abnormal').length || 0,
},
schedules: {
total: schedulesResponse.total || 0,
@@ -96,6 +100,78 @@ export function configureSystemTools(server: McpServer, client: CrawlabClient) {
}
}
);
server.tool(
SYSTEM_TOOLS.get_system_overview,
"Get a high-level overview of the Crawlab system including projects, spiders, nodes, and activity",
{},
async () => {
try {
// Get comprehensive system data
const [projectsResponse, spidersResponse, nodesResponse, tasksResponse, schedulesResponse] = await Promise.all([
client.getProjects({ page: 1, size: 100 }),
client.getSpiders({ page: 1, size: 100 }),
client.getNodes({ page: 1, size: 50 }),
client.getTasks({ page: 1, size: 50 }),
client.getSchedules({ page: 1, size: 100 }),
]);
const overview = {
timestamp: new Date().toISOString(),
system_health: await client.healthCheck(),
summary: {
projects: projectsResponse.total || 0,
spiders: spidersResponse.total || 0,
nodes: nodesResponse.total || 0,
recent_tasks: tasksResponse.total || 0,
schedules: schedulesResponse.total || 0,
},
nodes_detail: {
total: nodesResponse.total || 0,
master_nodes: nodesResponse.data?.filter(node => node.is_master).length || 0,
worker_nodes: nodesResponse.data?.filter(node => !node.is_master).length || 0,
active: nodesResponse.data?.filter(node => node.active).length || 0,
enabled: nodesResponse.data?.filter(node => node.enabled).length || 0,
},
tasks_detail: {
by_status: {
pending: tasksResponse.data?.filter(task => task.status === 'pending').length || 0,
assigned: tasksResponse.data?.filter(task => task.status === 'assigned').length || 0,
running: tasksResponse.data?.filter(task => task.status === 'running').length || 0,
finished: tasksResponse.data?.filter(task => task.status === 'finished').length || 0,
error: tasksResponse.data?.filter(task => task.status === 'error').length || 0,
cancelled: tasksResponse.data?.filter(task => task.status === 'cancelled').length || 0,
abnormal: tasksResponse.data?.filter(task => task.status === 'abnormal').length || 0,
}
},
schedules_detail: {
total: schedulesResponse.total || 0,
enabled: schedulesResponse.data?.filter(schedule => schedule.enabled).length || 0,
disabled: schedulesResponse.data?.filter(schedule => !schedule.enabled).length || 0,
},
};
return {
content: [
{
type: "text",
text: JSON.stringify(overview, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: "text",
text: `Error getting system overview: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
);
}
export { SYSTEM_TOOLS };

View File

@@ -20,7 +20,7 @@ export function configureTaskTools(server: McpServer, client: CrawlabClient) {
page: z.number().optional().describe("Page number for pagination (default: 1)"),
size: z.number().optional().describe("Number of tasks per page (default: 10)"),
spider_id: z.string().optional().describe("Filter by spider ID"),
status: z.string().optional().describe("Filter by task status (pending, running, success, error, cancelled)"),
status: z.string().optional().describe("Filter by task status (pending, assigned, running, finished, error, cancelled, abnormal)"),
},
async ({ page, size, spider_id, status }) => {
try {