first commit
This commit is contained in:
3114
backend/package-lock.json
generated
Normal file
3114
backend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
40
backend/package.json
Normal file
40
backend/package.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"name": "osint-backend",
|
||||
"version": "1.0.0",
|
||||
"description": "OSINT Platform Backend",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
"dev": "tsx watch src/index.ts",
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"express": "^4.18.2",
|
||||
"cors": "^2.8.5",
|
||||
"helmet": "^7.1.0",
|
||||
"bcryptjs": "^2.4.3",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"cookie-parser": "^1.4.6",
|
||||
"mongoose": "^8.2.1",
|
||||
"playwright": "^1.42.1",
|
||||
"playwright-extra": "^4.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"dotenv": "^16.4.5",
|
||||
"uuid": "^9.0.1",
|
||||
"winston": "^3.11.0",
|
||||
"express-rate-limit": "^7.2.0",
|
||||
"socket.io": "^4.7.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/cors": "^2.8.17",
|
||||
"@types/bcryptjs": "^2.4.6",
|
||||
"@types/jsonwebtoken": "^9.0.6",
|
||||
"@types/cookie-parser": "^1.4.7",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"@types/node": "^20.11.24",
|
||||
"typescript": "^5.4.2",
|
||||
"tsx": "^4.7.1"
|
||||
}
|
||||
}
|
||||
35
backend/src/database/index.ts
Normal file
35
backend/src/database/index.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import mongoose from 'mongoose';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export async function connectDatabase(): Promise<void> {
|
||||
const mongoUri = process.env.MONGODB_URI;
|
||||
|
||||
if (!mongoUri) {
|
||||
throw new Error('MONGODB_URI environment variable is required');
|
||||
}
|
||||
|
||||
try {
|
||||
await mongoose.connect(mongoUri, {
|
||||
dbName: 'osint_platform',
|
||||
});
|
||||
|
||||
logger.info('Connected to MongoDB');
|
||||
|
||||
mongoose.connection.on('error', (err) => {
|
||||
logger.error('MongoDB connection error:', err);
|
||||
});
|
||||
|
||||
mongoose.connection.on('disconnected', () => {
|
||||
logger.warn('MongoDB disconnected');
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Failed to connect to MongoDB:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function closeDatabase(): Promise<void> {
|
||||
await mongoose.connection.close();
|
||||
logger.info('MongoDB connection closed');
|
||||
}
|
||||
131
backend/src/index.ts
Normal file
131
backend/src/index.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import helmet from 'helmet';
|
||||
import cookieParser from 'cookie-parser';
|
||||
import { createServer } from 'http';
|
||||
import { Server } from 'socket.io';
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
import { authRouter } from './routes/auth.js';
|
||||
import { targetsRouter } from './routes/targets.js';
|
||||
import { sessionsRouter } from './routes/sessions.js';
|
||||
import { scraperRouter } from './routes/scraper.js';
|
||||
import { authMiddleware } from './middleware/auth.js';
|
||||
import { connectDatabase } from './database/index.js';
|
||||
import { logger } from './utils/logger.js';
|
||||
import { ScraperManager } from './scraper/manager.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
dotenv.config({ path: path.join(__dirname, '../../.env') });
|
||||
|
||||
const app = express();
|
||||
const httpServer = createServer(app);
|
||||
const io = new Server(httpServer, {
|
||||
cors: {
|
||||
origin: process.env.FRONTEND_URL || 'http://localhost:5173',
|
||||
credentials: true,
|
||||
},
|
||||
});
|
||||
|
||||
const PORT = process.env.PORT || 3001;
|
||||
|
||||
// Initialize scraper manager with socket.io
|
||||
const scraperManager = new ScraperManager(io);
|
||||
app.set('scraperManager', scraperManager);
|
||||
app.set('io', io);
|
||||
|
||||
// Middleware
|
||||
app.use(helmet({
|
||||
contentSecurityPolicy: false,
|
||||
}));
|
||||
app.use(cors({
|
||||
origin: process.env.FRONTEND_URL || 'http://localhost:5173',
|
||||
credentials: true,
|
||||
}));
|
||||
app.use(express.json());
|
||||
app.use(cookieParser());
|
||||
|
||||
// Serve static files from frontend build in production
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
app.use(express.static(path.join(__dirname, '../../frontend/dist')));
|
||||
}
|
||||
|
||||
// API Routes
|
||||
app.use('/api/auth', authRouter);
|
||||
app.use('/api/targets', authMiddleware, targetsRouter);
|
||||
app.use('/api/sessions', authMiddleware, sessionsRouter);
|
||||
app.use('/api/scraper', authMiddleware, scraperRouter);
|
||||
|
||||
// Health check
|
||||
app.get('/api/health', (req, res) => {
|
||||
res.json({ status: 'ok', timestamp: new Date().toISOString() });
|
||||
});
|
||||
|
||||
// Catch-all for SPA routing in production
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
app.get('*', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, '../../frontend/dist/index.html'));
|
||||
});
|
||||
}
|
||||
|
||||
// Socket.io authentication
|
||||
io.use((socket, next) => {
|
||||
const token = socket.handshake.auth.token;
|
||||
if (!token) {
|
||||
return next(new Error('Authentication required'));
|
||||
}
|
||||
// Token validation would go here
|
||||
next();
|
||||
});
|
||||
|
||||
io.on('connection', (socket) => {
|
||||
logger.info(`Client connected: ${socket.id}`);
|
||||
|
||||
socket.on('subscribe:scraper', (scraperId: string) => {
|
||||
socket.join(`scraper:${scraperId}`);
|
||||
});
|
||||
|
||||
socket.on('unsubscribe:scraper', (scraperId: string) => {
|
||||
socket.leave(`scraper:${scraperId}`);
|
||||
});
|
||||
|
||||
socket.on('disconnect', () => {
|
||||
logger.info(`Client disconnected: ${socket.id}`);
|
||||
});
|
||||
});
|
||||
|
||||
// Global error handlers
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error('Unhandled Rejection at:', promise, 'reason:', reason);
|
||||
});
|
||||
|
||||
process.on('uncaughtException', (error) => {
|
||||
logger.error('Uncaught Exception:', error);
|
||||
// Important: give the logger time to write before exiting
|
||||
setTimeout(() => {
|
||||
process.exit(1);
|
||||
}, 1000);
|
||||
});
|
||||
|
||||
// Start server with database connection
|
||||
async function start() {
|
||||
try {
|
||||
await connectDatabase();
|
||||
|
||||
httpServer.listen(PORT, () => {
|
||||
logger.info(`🔒 OSINT Platform Backend running on port ${PORT}`);
|
||||
logger.info(`📊 Environment: ${process.env.NODE_ENV || 'development'}`);
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to start server:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
start();
|
||||
|
||||
export { app, io };
|
||||
55
backend/src/middleware/auth.ts
Normal file
55
backend/src/middleware/auth.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { Request, Response, NextFunction } from 'express';
|
||||
import jwt from 'jsonwebtoken';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export interface AuthRequest extends Request {
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export function authMiddleware(req: AuthRequest, res: Response, next: NextFunction): void {
|
||||
try {
|
||||
// Check for token in Authorization header or cookie
|
||||
const authHeader = req.headers.authorization;
|
||||
const cookieToken = req.cookies?.auth_token;
|
||||
|
||||
let token: string | undefined;
|
||||
|
||||
if (authHeader && authHeader.startsWith('Bearer ')) {
|
||||
token = authHeader.substring(7);
|
||||
} else if (cookieToken) {
|
||||
token = cookieToken;
|
||||
}
|
||||
|
||||
if (!token) {
|
||||
res.status(401).json({ error: 'Authentication required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const jwtSecret = process.env.JWT_SECRET;
|
||||
if (!jwtSecret) {
|
||||
logger.error('JWT_SECRET not configured');
|
||||
res.status(500).json({ error: 'Server configuration error' });
|
||||
return;
|
||||
}
|
||||
|
||||
const decoded = jwt.verify(token, jwtSecret) as { authenticated: boolean };
|
||||
|
||||
if (!decoded.authenticated) {
|
||||
res.status(401).json({ error: 'Invalid token' });
|
||||
return;
|
||||
}
|
||||
|
||||
next();
|
||||
} catch (error) {
|
||||
if (error instanceof jwt.TokenExpiredError) {
|
||||
res.status(401).json({ error: 'Token expired' });
|
||||
return;
|
||||
}
|
||||
if (error instanceof jwt.JsonWebTokenError) {
|
||||
res.status(401).json({ error: 'Invalid token' });
|
||||
return;
|
||||
}
|
||||
logger.error('Auth middleware error:', error);
|
||||
res.status(500).json({ error: 'Authentication error' });
|
||||
}
|
||||
}
|
||||
52
backend/src/models/ScraperJob.ts
Normal file
52
backend/src/models/ScraperJob.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import mongoose, { Document, Schema } from 'mongoose';
|
||||
|
||||
export interface IScraperLog {
|
||||
level: string;
|
||||
message: string;
|
||||
timestamp: Date;
|
||||
}
|
||||
|
||||
export interface IScraperJob extends Document {
|
||||
targetId?: mongoose.Types.ObjectId;
|
||||
profileId?: mongoose.Types.ObjectId;
|
||||
platform: string;
|
||||
status: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
progress: number;
|
||||
result?: Record<string, any>;
|
||||
error?: string;
|
||||
logs: IScraperLog[];
|
||||
startedAt?: Date;
|
||||
completedAt?: Date;
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
const ScraperLogSchema = new Schema<IScraperLog>({
|
||||
level: { type: String, required: true },
|
||||
message: { type: String, required: true },
|
||||
timestamp: { type: Date, default: Date.now },
|
||||
}, { _id: false });
|
||||
|
||||
const ScraperJobSchema = new Schema<IScraperJob>({
|
||||
targetId: { type: Schema.Types.ObjectId, ref: 'Target' },
|
||||
profileId: { type: Schema.Types.ObjectId },
|
||||
platform: { type: String, required: true },
|
||||
status: {
|
||||
type: String,
|
||||
enum: ['pending', 'running', 'completed', 'failed', 'cancelled'],
|
||||
default: 'pending'
|
||||
},
|
||||
progress: { type: Number, default: 0 },
|
||||
result: { type: Schema.Types.Mixed },
|
||||
error: { type: String },
|
||||
logs: [ScraperLogSchema],
|
||||
startedAt: { type: Date },
|
||||
completedAt: { type: Date },
|
||||
}, {
|
||||
timestamps: true,
|
||||
});
|
||||
|
||||
// Index for efficient querying
|
||||
ScraperJobSchema.index({ status: 1, createdAt: -1 });
|
||||
ScraperJobSchema.index({ targetId: 1 });
|
||||
|
||||
export const ScraperJob = mongoose.model<IScraperJob>('ScraperJob', ScraperJobSchema);
|
||||
33
backend/src/models/Session.ts
Normal file
33
backend/src/models/Session.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import mongoose, { Document, Schema } from 'mongoose';
|
||||
|
||||
export interface ISession extends Document {
|
||||
platform: string;
|
||||
sessionName: string;
|
||||
cookiesEncrypted: string;
|
||||
localStorageEncrypted?: string;
|
||||
userAgent?: string;
|
||||
proxy?: string;
|
||||
status: 'active' | 'expired' | 'invalid';
|
||||
lastValidated?: Date;
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
const SessionSchema = new Schema<ISession>({
|
||||
platform: { type: String, required: true, unique: true },
|
||||
sessionName: { type: String, required: true },
|
||||
cookiesEncrypted: { type: String, required: true },
|
||||
localStorageEncrypted: { type: String },
|
||||
userAgent: { type: String },
|
||||
proxy: { type: String },
|
||||
status: {
|
||||
type: String,
|
||||
enum: ['active', 'expired', 'invalid'],
|
||||
default: 'active'
|
||||
},
|
||||
lastValidated: { type: Date },
|
||||
}, {
|
||||
timestamps: true,
|
||||
});
|
||||
|
||||
export const Session = mongoose.model<ISession>('Session', SessionSchema);
|
||||
47
backend/src/models/Target.ts
Normal file
47
backend/src/models/Target.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import mongoose, { Document, Schema } from 'mongoose';
|
||||
|
||||
export interface ISocialProfile {
|
||||
_id: mongoose.Types.ObjectId;
|
||||
platform: string;
|
||||
username?: string;
|
||||
profileUrl?: string;
|
||||
profileData?: Record<string, any>;
|
||||
lastScraped?: Date;
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
export interface ITarget extends Document {
|
||||
name: string;
|
||||
notes?: string;
|
||||
profiles: ISocialProfile[];
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
const SocialProfileSchema = new Schema<ISocialProfile>({
|
||||
platform: { type: String, required: true },
|
||||
username: { type: String },
|
||||
profileUrl: { type: String },
|
||||
profileData: { type: Schema.Types.Mixed },
|
||||
lastScraped: { type: Date },
|
||||
createdAt: { type: Date, default: Date.now },
|
||||
});
|
||||
|
||||
const TargetSchema = new Schema<ITarget>({
|
||||
name: { type: String, required: true },
|
||||
notes: { type: String },
|
||||
profiles: [SocialProfileSchema],
|
||||
}, {
|
||||
timestamps: true,
|
||||
});
|
||||
|
||||
// Virtual for profile count
|
||||
TargetSchema.virtual('profileCount').get(function() {
|
||||
return this.profiles?.length || 0;
|
||||
});
|
||||
|
||||
// Ensure virtuals are serialized
|
||||
TargetSchema.set('toJSON', { virtuals: true });
|
||||
TargetSchema.set('toObject', { virtuals: true });
|
||||
|
||||
export const Target = mongoose.model<ITarget>('Target', TargetSchema);
|
||||
3
backend/src/models/index.ts
Normal file
3
backend/src/models/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
export { Target, type ITarget, type ISocialProfile } from './Target.js';
|
||||
export { Session, type ISession } from './Session.js';
|
||||
export { ScraperJob, type IScraperJob, type IScraperLog } from './ScraperJob.js';
|
||||
113
backend/src/routes/auth.ts
Normal file
113
backend/src/routes/auth.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import bcrypt from 'bcryptjs';
|
||||
import jwt from 'jsonwebtoken';
|
||||
import rateLimit from 'express-rate-limit';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export const authRouter = Router();
|
||||
|
||||
// Rate limiting for auth endpoints
|
||||
const authLimiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 5, // 5 attempts per window
|
||||
message: { error: 'Too many authentication attempts, please try again later' },
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
});
|
||||
|
||||
// Login with master password
|
||||
authRouter.post('/login', authLimiter, async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { password } = req.body;
|
||||
|
||||
if (!password) {
|
||||
res.status(400).json({ error: 'Password is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const masterPassword = process.env.MASTER_PASSWORD;
|
||||
if (!masterPassword) {
|
||||
logger.error('MASTER_PASSWORD not configured');
|
||||
res.status(500).json({ error: 'Server configuration error' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Simple comparison for now - in production you'd hash the stored password
|
||||
const isValid = password === masterPassword;
|
||||
|
||||
if (!isValid) {
|
||||
logger.warn('Failed login attempt');
|
||||
res.status(401).json({ error: 'Invalid password' });
|
||||
return;
|
||||
}
|
||||
|
||||
const jwtSecret = process.env.JWT_SECRET;
|
||||
if (!jwtSecret) {
|
||||
logger.error('JWT_SECRET not configured');
|
||||
res.status(500).json({ error: 'Server configuration error' });
|
||||
return;
|
||||
}
|
||||
|
||||
const token = jwt.sign(
|
||||
{ authenticated: true },
|
||||
jwtSecret as string,
|
||||
{ expiresIn: (process.env.SESSION_EXPIRY || '24h') as any }
|
||||
);
|
||||
|
||||
// Set HTTP-only cookie
|
||||
res.cookie('auth_token', token, {
|
||||
httpOnly: true,
|
||||
secure: process.env.NODE_ENV === 'production',
|
||||
sameSite: 'strict',
|
||||
maxAge: 24 * 60 * 60 * 1000, // 24 hours
|
||||
});
|
||||
|
||||
logger.info('Successful login');
|
||||
res.json({
|
||||
success: true,
|
||||
token,
|
||||
expiresIn: process.env.SESSION_EXPIRY || '24h'
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Login error:', error);
|
||||
res.status(500).json({ error: 'Authentication failed' });
|
||||
}
|
||||
});
|
||||
|
||||
// Verify token
|
||||
authRouter.get('/verify', (req: Request, res: Response) => {
|
||||
try {
|
||||
const authHeader = req.headers.authorization;
|
||||
const cookieToken = req.cookies?.auth_token;
|
||||
|
||||
let token: string | undefined;
|
||||
|
||||
if (authHeader && authHeader.startsWith('Bearer ')) {
|
||||
token = authHeader.substring(7);
|
||||
} else if (cookieToken) {
|
||||
token = cookieToken;
|
||||
}
|
||||
|
||||
if (!token) {
|
||||
res.status(401).json({ authenticated: false });
|
||||
return;
|
||||
}
|
||||
|
||||
const jwtSecret = process.env.JWT_SECRET;
|
||||
if (!jwtSecret) {
|
||||
res.status(500).json({ error: 'Server configuration error' });
|
||||
return;
|
||||
}
|
||||
|
||||
jwt.verify(token, jwtSecret);
|
||||
res.json({ authenticated: true });
|
||||
} catch (error) {
|
||||
res.status(401).json({ authenticated: false });
|
||||
}
|
||||
});
|
||||
|
||||
// Logout
|
||||
authRouter.post('/logout', (req: Request, res: Response) => {
|
||||
res.clearCookie('auth_token');
|
||||
res.json({ success: true });
|
||||
});
|
||||
199
backend/src/routes/scraper.ts
Normal file
199
backend/src/routes/scraper.ts
Normal file
@@ -0,0 +1,199 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import mongoose from 'mongoose';
|
||||
import { ScraperJob } from '../models/ScraperJob.js';
|
||||
import { Target } from '../models/Target.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { ScraperManager } from '../scraper/manager.js';
|
||||
|
||||
export const scraperRouter = Router();
|
||||
|
||||
// Get all jobs
|
||||
scraperRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = parseInt(req.query.limit as string) || 50;
|
||||
|
||||
const jobs = await ScraperJob.find()
|
||||
.sort({ createdAt: -1 })
|
||||
.limit(limit)
|
||||
.lean();
|
||||
|
||||
// Populate target names
|
||||
const targetIds = [...new Set(jobs.filter(j => j.targetId).map(j => j.targetId!.toString()))];
|
||||
const targets = await Target.find({ _id: { $in: targetIds } }).select('name').lean();
|
||||
const targetMap = new Map(targets.map(t => [t._id.toString(), t.name]));
|
||||
|
||||
const formattedJobs = jobs.map(job => ({
|
||||
id: job._id,
|
||||
target_id: job.targetId,
|
||||
profile_id: job.profileId,
|
||||
platform: job.platform,
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
result: job.result ? JSON.stringify(job.result) : null,
|
||||
error: job.error,
|
||||
target_name: job.targetId ? targetMap.get(job.targetId.toString()) : null,
|
||||
started_at: job.startedAt,
|
||||
completed_at: job.completedAt,
|
||||
created_at: job.createdAt,
|
||||
}));
|
||||
|
||||
res.json(formattedJobs);
|
||||
} catch (error) {
|
||||
logger.error('Error fetching jobs:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch jobs' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get job by ID with logs
|
||||
scraperRouter.get('/jobs/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const job = await ScraperJob.findById(id).lean();
|
||||
|
||||
if (!job) {
|
||||
res.status(404).json({ error: 'Job not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Get target name
|
||||
let targetName = null;
|
||||
if (job.targetId) {
|
||||
const target = await Target.findById(job.targetId).select('name').lean();
|
||||
targetName = target?.name;
|
||||
}
|
||||
|
||||
res.json({
|
||||
id: job._id,
|
||||
target_id: job.targetId,
|
||||
profile_id: job.profileId,
|
||||
platform: job.platform,
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
result: job.result,
|
||||
error: job.error,
|
||||
target_name: targetName,
|
||||
started_at: job.startedAt,
|
||||
completed_at: job.completedAt,
|
||||
created_at: job.createdAt,
|
||||
logs: job.logs,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error fetching job:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch job' });
|
||||
}
|
||||
});
|
||||
|
||||
// Start a new scrape job
|
||||
scraperRouter.post('/start', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { target_id, profile_id, platform, profile_url } = req.body;
|
||||
|
||||
if (!platform) {
|
||||
res.status(400).json({ error: 'Platform is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Create job record
|
||||
const job = new ScraperJob({
|
||||
targetId: target_id ? new mongoose.Types.ObjectId(target_id) : undefined,
|
||||
profileId: profile_id ? new mongoose.Types.ObjectId(profile_id) : undefined,
|
||||
platform,
|
||||
status: 'pending',
|
||||
progress: 0,
|
||||
logs: [],
|
||||
});
|
||||
await job.save();
|
||||
|
||||
// Get scraper manager and start job
|
||||
const scraperManager = req.app.get('scraperManager') as ScraperManager;
|
||||
|
||||
scraperManager.startJob({
|
||||
jobId: job._id.toString(),
|
||||
platform,
|
||||
profileUrl: profile_url,
|
||||
targetId: target_id,
|
||||
profileId: profile_id,
|
||||
});
|
||||
|
||||
logger.info(`Started scraper job: ${job._id} for ${platform}`);
|
||||
res.status(201).json({
|
||||
id: job._id,
|
||||
platform: job.platform,
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
created_at: job.createdAt,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error starting job:', error);
|
||||
res.status(500).json({ error: 'Failed to start job' });
|
||||
}
|
||||
});
|
||||
|
||||
// Cancel a job
|
||||
scraperRouter.post('/jobs/:id/cancel', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
|
||||
const scraperManager = req.app.get('scraperManager') as ScraperManager;
|
||||
await scraperManager.cancelJob(id);
|
||||
|
||||
const job = await ScraperJob.findById(id).lean();
|
||||
|
||||
res.json({
|
||||
id: job?._id,
|
||||
status: job?.status,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error cancelling job:', error);
|
||||
res.status(500).json({ error: 'Failed to cancel job' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get active jobs summary
|
||||
scraperRouter.get('/status', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const [pending, running, completed, failed] = await Promise.all([
|
||||
ScraperJob.countDocuments({ status: 'pending' }),
|
||||
ScraperJob.countDocuments({ status: 'running' }),
|
||||
ScraperJob.countDocuments({ status: 'completed' }),
|
||||
ScraperJob.countDocuments({ status: 'failed' }),
|
||||
]);
|
||||
|
||||
const activeJobs = await ScraperJob.find({ status: { $in: ['pending', 'running'] } })
|
||||
.sort({ createdAt: -1 })
|
||||
.select('_id platform status progress startedAt')
|
||||
.lean();
|
||||
|
||||
res.json({
|
||||
counts: { pending, running, completed, failed },
|
||||
activeJobs: activeJobs.map(j => ({
|
||||
id: j._id,
|
||||
platform: j.platform,
|
||||
status: j.status,
|
||||
progress: j.progress,
|
||||
started_at: j.startedAt,
|
||||
})),
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error fetching scraper status:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch status' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get logs for a job
|
||||
scraperRouter.get('/jobs/:id/logs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const job = await ScraperJob.findById(id).select('logs').lean();
|
||||
|
||||
if (!job) {
|
||||
res.status(404).json({ error: 'Job not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json(job.logs || []);
|
||||
} catch (error) {
|
||||
logger.error('Error fetching logs:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch logs' });
|
||||
}
|
||||
});
|
||||
225
backend/src/routes/sessions.ts
Normal file
225
backend/src/routes/sessions.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { Session } from '../models/Session.js';
|
||||
import { encrypt, decrypt } from '../utils/encryption.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export const sessionsRouter = Router();
|
||||
|
||||
// Get all sessions (without sensitive data)
|
||||
sessionsRouter.get('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const sessions = await Session.find()
|
||||
.select('-cookiesEncrypted -localStorageEncrypted')
|
||||
.sort({ updatedAt: -1 })
|
||||
.lean();
|
||||
|
||||
const formattedSessions = sessions.map(s => ({
|
||||
id: s._id,
|
||||
platform: s.platform,
|
||||
session_name: s.sessionName,
|
||||
user_agent: s.userAgent,
|
||||
proxy: s.proxy,
|
||||
status: s.status,
|
||||
last_validated: s.lastValidated,
|
||||
created_at: s.createdAt,
|
||||
updated_at: s.updatedAt,
|
||||
}));
|
||||
|
||||
res.json(formattedSessions);
|
||||
} catch (error) {
|
||||
logger.error('Error fetching sessions:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch sessions' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get session by platform
|
||||
sessionsRouter.get('/platform/:platform', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { platform } = req.params;
|
||||
const session = await Session.findOne({ platform })
|
||||
.select('-cookiesEncrypted -localStorageEncrypted')
|
||||
.lean();
|
||||
|
||||
if (!session) {
|
||||
res.status(404).json({ error: 'Session not found for platform' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({
|
||||
id: session._id,
|
||||
platform: session.platform,
|
||||
session_name: session.sessionName,
|
||||
user_agent: session.userAgent,
|
||||
proxy: session.proxy,
|
||||
status: session.status,
|
||||
last_validated: session.lastValidated,
|
||||
created_at: session.createdAt,
|
||||
updated_at: session.updatedAt,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error fetching session:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch session' });
|
||||
}
|
||||
});
|
||||
|
||||
// Create or update session
|
||||
sessionsRouter.post('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { platform, session_name, cookies, local_storage, user_agent, proxy } = req.body;
|
||||
|
||||
if (!platform || !session_name || !cookies) {
|
||||
res.status(400).json({ error: 'Platform, session_name, and cookies are required' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Encrypt sensitive data
|
||||
const cookiesEncrypted = encrypt(JSON.stringify(cookies));
|
||||
const localStorageEncrypted = local_storage ? encrypt(JSON.stringify(local_storage)) : undefined;
|
||||
|
||||
// Upsert session
|
||||
const session = await Session.findOneAndUpdate(
|
||||
{ platform },
|
||||
{
|
||||
platform,
|
||||
sessionName: session_name,
|
||||
cookiesEncrypted,
|
||||
localStorageEncrypted,
|
||||
userAgent: user_agent,
|
||||
proxy,
|
||||
status: 'active',
|
||||
},
|
||||
{ upsert: true, new: true }
|
||||
).lean();
|
||||
|
||||
logger.info(`Saved session for ${platform}`);
|
||||
|
||||
res.status(201).json({
|
||||
id: session._id,
|
||||
platform: session.platform,
|
||||
session_name: session.sessionName,
|
||||
user_agent: session.userAgent,
|
||||
proxy: session.proxy,
|
||||
status: session.status,
|
||||
created_at: session.createdAt,
|
||||
updated_at: session.updatedAt,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error saving session:', error);
|
||||
res.status(500).json({ error: 'Failed to save session' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get decrypted session data (for internal scraper use)
|
||||
sessionsRouter.get('/decrypt/:platform', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { platform } = req.params;
|
||||
const session = await Session.findOne({ platform }).lean();
|
||||
|
||||
if (!session) {
|
||||
res.status(404).json({ error: 'Session not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
const cookies = JSON.parse(decrypt(session.cookiesEncrypted));
|
||||
const localStorage = session.localStorageEncrypted
|
||||
? JSON.parse(decrypt(session.localStorageEncrypted))
|
||||
: null;
|
||||
|
||||
res.json({
|
||||
id: session._id,
|
||||
platform: session.platform,
|
||||
session_name: session.sessionName,
|
||||
cookies,
|
||||
localStorage,
|
||||
user_agent: session.userAgent,
|
||||
proxy: session.proxy,
|
||||
status: session.status,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error decrypting session:', error);
|
||||
res.status(500).json({ error: 'Failed to decrypt session' });
|
||||
}
|
||||
});
|
||||
|
||||
// Validate session
|
||||
sessionsRouter.post('/validate/:platform', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { platform } = req.params;
|
||||
|
||||
const session = await Session.findOneAndUpdate(
|
||||
{ platform },
|
||||
{ lastValidated: new Date() },
|
||||
{ new: true }
|
||||
).select('-cookiesEncrypted -localStorageEncrypted').lean();
|
||||
|
||||
if (!session) {
|
||||
res.status(404).json({ error: 'Session not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({
|
||||
id: session._id,
|
||||
platform: session.platform,
|
||||
session_name: session.sessionName,
|
||||
status: session.status,
|
||||
last_validated: session.lastValidated,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error validating session:', error);
|
||||
res.status(500).json({ error: 'Failed to validate session' });
|
||||
}
|
||||
});
|
||||
|
||||
// Update session status
|
||||
sessionsRouter.patch('/:id/status', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { status } = req.body;
|
||||
|
||||
if (!status || !['active', 'expired', 'invalid'].includes(status)) {
|
||||
res.status(400).json({ error: 'Valid status required (active, expired, invalid)' });
|
||||
return;
|
||||
}
|
||||
|
||||
const session = await Session.findByIdAndUpdate(
|
||||
id,
|
||||
{ status },
|
||||
{ new: true }
|
||||
).select('-cookiesEncrypted -localStorageEncrypted').lean();
|
||||
|
||||
if (!session) {
|
||||
res.status(404).json({ error: 'Session not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({
|
||||
id: session._id,
|
||||
platform: session.platform,
|
||||
session_name: session.sessionName,
|
||||
status: session.status,
|
||||
updated_at: session.updatedAt,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error updating session status:', error);
|
||||
res.status(500).json({ error: 'Failed to update session status' });
|
||||
}
|
||||
});
|
||||
|
||||
// Delete session
|
||||
sessionsRouter.delete('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const result = await Session.findByIdAndDelete(id);
|
||||
|
||||
if (!result) {
|
||||
res.status(404).json({ error: 'Session not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Deleted session: ${id}`);
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
logger.error('Error deleting session:', error);
|
||||
res.status(500).json({ error: 'Failed to delete session' });
|
||||
}
|
||||
});
|
||||
199
backend/src/routes/targets.ts
Normal file
199
backend/src/routes/targets.ts
Normal file
@@ -0,0 +1,199 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { Target } from '../models/Target.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export const targetsRouter = Router();
|
||||
|
||||
// Get all targets
|
||||
targetsRouter.get('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const targets = await Target.find()
|
||||
.sort({ updatedAt: -1 })
|
||||
.lean();
|
||||
|
||||
// Add profile count
|
||||
const targetsWithCount = targets.map(t => ({
|
||||
...t,
|
||||
id: t._id,
|
||||
profile_count: t.profiles?.length || 0,
|
||||
}));
|
||||
|
||||
res.json(targetsWithCount);
|
||||
} catch (error) {
|
||||
logger.error('Error fetching targets:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch targets' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get single target with profiles
|
||||
targetsRouter.get('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const target = await Target.findById(id).lean();
|
||||
|
||||
if (!target) {
|
||||
res.status(404).json({ error: 'Target not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({
|
||||
...target,
|
||||
id: target._id,
|
||||
profiles: target.profiles?.map(p => ({
|
||||
...p,
|
||||
id: p._id,
|
||||
target_id: target._id,
|
||||
profile_url: p.profileUrl,
|
||||
profile_data: p.profileData ? JSON.stringify(p.profileData) : null,
|
||||
last_scraped: p.lastScraped,
|
||||
created_at: p.createdAt,
|
||||
})) || [],
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error fetching target:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch target' });
|
||||
}
|
||||
});
|
||||
|
||||
// Create target
|
||||
targetsRouter.post('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { name, notes } = req.body;
|
||||
|
||||
if (!name) {
|
||||
res.status(400).json({ error: 'Name is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const target = new Target({ name, notes, profiles: [] });
|
||||
await target.save();
|
||||
|
||||
logger.info(`Created target: ${name} (${target._id})`);
|
||||
res.status(201).json({
|
||||
...target.toObject(),
|
||||
id: target._id,
|
||||
profile_count: 0,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error creating target:', error);
|
||||
res.status(500).json({ error: 'Failed to create target' });
|
||||
}
|
||||
});
|
||||
|
||||
// Update target
|
||||
targetsRouter.put('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { name, notes } = req.body;
|
||||
|
||||
const target = await Target.findByIdAndUpdate(
|
||||
id,
|
||||
{ name, notes },
|
||||
{ new: true }
|
||||
).lean();
|
||||
|
||||
if (!target) {
|
||||
res.status(404).json({ error: 'Target not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({ ...target, id: target._id });
|
||||
} catch (error) {
|
||||
logger.error('Error updating target:', error);
|
||||
res.status(500).json({ error: 'Failed to update target' });
|
||||
}
|
||||
});
|
||||
|
||||
// Delete target
|
||||
targetsRouter.delete('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const result = await Target.findByIdAndDelete(id);
|
||||
|
||||
if (!result) {
|
||||
res.status(404).json({ error: 'Target not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Deleted target: ${id}`);
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
logger.error('Error deleting target:', error);
|
||||
res.status(500).json({ error: 'Failed to delete target' });
|
||||
}
|
||||
});
|
||||
|
||||
// Add profile to target
|
||||
targetsRouter.post('/:id/profiles', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { platform, username, profile_url } = req.body;
|
||||
|
||||
if (!platform) {
|
||||
res.status(400).json({ error: 'Platform is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const target = await Target.findById(id);
|
||||
if (!target) {
|
||||
res.status(404).json({ error: 'Target not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
const newProfile = {
|
||||
platform,
|
||||
username: username || undefined,
|
||||
profileUrl: profile_url || undefined,
|
||||
createdAt: new Date(),
|
||||
};
|
||||
|
||||
target.profiles.push(newProfile as any);
|
||||
await target.save();
|
||||
|
||||
const addedProfile = target.profiles[target.profiles.length - 1];
|
||||
|
||||
logger.info(`Added ${platform} profile to target ${id}`);
|
||||
res.status(201).json({
|
||||
id: addedProfile._id,
|
||||
target_id: id,
|
||||
platform: addedProfile.platform,
|
||||
username: addedProfile.username,
|
||||
profile_url: addedProfile.profileUrl,
|
||||
created_at: addedProfile.createdAt,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Error adding profile:', error);
|
||||
res.status(500).json({ error: 'Failed to add profile' });
|
||||
}
|
||||
});
|
||||
|
||||
// Delete profile
|
||||
targetsRouter.delete('/:id/profiles/:profileId', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id, profileId } = req.params;
|
||||
|
||||
const target = await Target.findById(id);
|
||||
if (!target) {
|
||||
res.status(404).json({ error: 'Target not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
const profileIndex = target.profiles.findIndex(
|
||||
p => p._id.toString() === profileId
|
||||
);
|
||||
|
||||
if (profileIndex === -1) {
|
||||
res.status(404).json({ error: 'Profile not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
target.profiles.splice(profileIndex, 1);
|
||||
await target.save();
|
||||
|
||||
logger.info(`Deleted profile ${profileId} from target ${id}`);
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
logger.error('Error deleting profile:', error);
|
||||
res.status(500).json({ error: 'Failed to delete profile' });
|
||||
}
|
||||
});
|
||||
509
backend/src/scraper/manager.ts
Normal file
509
backend/src/scraper/manager.ts
Normal file
@@ -0,0 +1,509 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { Server } from 'socket.io';
|
||||
import { chromium, Browser, BrowserContext, Page } from 'playwright';
|
||||
import mongoose from 'mongoose';
|
||||
import { ScraperJob } from '../models/ScraperJob.js';
|
||||
import { Session } from '../models/Session.js';
|
||||
import { Target } from '../models/Target.js';
|
||||
import { decrypt } from '../utils/encryption.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
declare const document: any;
|
||||
declare const window: any;
|
||||
|
||||
export interface ScraperJobConfig {
|
||||
jobId: string;
|
||||
platform: string;
|
||||
profileUrl?: string;
|
||||
targetId?: string;
|
||||
profileId?: string;
|
||||
}
|
||||
|
||||
interface ActiveJob {
|
||||
config: ScraperJobConfig;
|
||||
browser?: Browser;
|
||||
context?: BrowserContext;
|
||||
page?: Page;
|
||||
abortController: AbortController;
|
||||
}
|
||||
|
||||
export class ScraperManager {
|
||||
private io: Server;
|
||||
private activeJobs: Map<string, ActiveJob> = new Map();
|
||||
private browser: Browser | null = null;
|
||||
|
||||
constructor(io: Server) {
|
||||
this.io = io;
|
||||
}
|
||||
|
||||
private async log(jobId: string, level: string, message: string): Promise<void> {
|
||||
await ScraperJob.findByIdAndUpdate(jobId, {
|
||||
$push: {
|
||||
logs: {
|
||||
level,
|
||||
message,
|
||||
timestamp: new Date(),
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Emit to socket
|
||||
this.io.to(`scraper:${jobId}`).emit('scraper:log', {
|
||||
jobId,
|
||||
level,
|
||||
message,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
logger[level as 'info' | 'warn' | 'error'](`[Job ${jobId}] ${message}`);
|
||||
}
|
||||
|
||||
private async updateJobStatus(
|
||||
jobId: string,
|
||||
status: string,
|
||||
progress?: number,
|
||||
result?: any,
|
||||
error?: string
|
||||
): Promise<void> {
|
||||
const update: any = { status };
|
||||
|
||||
if (progress !== undefined) update.progress = progress;
|
||||
if (result !== undefined) update.result = result;
|
||||
if (error !== undefined) update.error = error;
|
||||
if (status === 'running') update.startedAt = new Date();
|
||||
if (status === 'completed' || status === 'failed' || status === 'cancelled') {
|
||||
update.completedAt = new Date();
|
||||
}
|
||||
|
||||
await ScraperJob.findByIdAndUpdate(jobId, update);
|
||||
|
||||
// Emit status update
|
||||
this.io.to(`scraper:${jobId}`).emit('scraper:status', {
|
||||
jobId,
|
||||
status,
|
||||
progress,
|
||||
result: result ? result : undefined,
|
||||
error,
|
||||
});
|
||||
|
||||
// Also emit to general channel
|
||||
this.io.emit('scraper:jobUpdate', { jobId, status, progress });
|
||||
}
|
||||
|
||||
async startJob(config: ScraperJobConfig): Promise<void> {
|
||||
const abortController = new AbortController();
|
||||
|
||||
const activeJob: ActiveJob = {
|
||||
config,
|
||||
abortController,
|
||||
};
|
||||
|
||||
this.activeJobs.set(config.jobId, activeJob);
|
||||
|
||||
// Run asynchronously
|
||||
this.runJob(config, abortController.signal).catch((error) => {
|
||||
logger.error(`Job ${config.jobId} failed:`, error);
|
||||
});
|
||||
}
|
||||
|
||||
private async runJob(config: ScraperJobConfig, signal: AbortSignal): Promise<void> {
|
||||
const { jobId, platform, profileUrl } = config;
|
||||
|
||||
try {
|
||||
await this.updateJobStatus(jobId, 'running', 0);
|
||||
await this.log(jobId, 'info', `Starting scrape for platform: ${platform}`);
|
||||
|
||||
// Load session from vault
|
||||
const session = await this.loadSession(platform);
|
||||
|
||||
if (!session) {
|
||||
throw new Error(`No session found for platform: ${platform}`);
|
||||
}
|
||||
|
||||
await this.log(jobId, 'info', `Loaded session: ${session.sessionName}`);
|
||||
await this.updateJobStatus(jobId, 'running', 10);
|
||||
|
||||
// Initialize browser
|
||||
await this.log(jobId, 'info', 'Initializing browser...');
|
||||
const browser = await this.getBrowser();
|
||||
|
||||
if (signal.aborted) {
|
||||
throw new Error('Job cancelled');
|
||||
}
|
||||
|
||||
// Create context with session
|
||||
const context = await browser.newContext({
|
||||
userAgent: session.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
locale: 'en-US',
|
||||
});
|
||||
|
||||
// Add cookies
|
||||
if (session.cookies && Array.isArray(session.cookies)) {
|
||||
await context.addCookies(session.cookies);
|
||||
await this.log(jobId, 'info', `Loaded ${session.cookies.length} cookies`);
|
||||
}
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 20);
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// Store references
|
||||
const activeJob = this.activeJobs.get(jobId);
|
||||
if (activeJob) {
|
||||
activeJob.context = context;
|
||||
activeJob.page = page;
|
||||
}
|
||||
|
||||
if (signal.aborted) {
|
||||
await context.close();
|
||||
throw new Error('Job cancelled');
|
||||
}
|
||||
|
||||
// Run platform-specific scraper
|
||||
let result: any;
|
||||
|
||||
switch (platform.toLowerCase()) {
|
||||
case 'x':
|
||||
case 'twitter':
|
||||
result = await this.scrapeTwitter(jobId, page, profileUrl, signal);
|
||||
break;
|
||||
case 'instagram':
|
||||
result = await this.scrapeInstagram(jobId, page, profileUrl, signal);
|
||||
break;
|
||||
case 'linkedin':
|
||||
result = await this.scrapeLinkedIn(jobId, page, profileUrl, signal);
|
||||
break;
|
||||
case 'facebook':
|
||||
result = await this.scrapeFacebook(jobId, page, profileUrl, signal);
|
||||
break;
|
||||
default:
|
||||
result = await this.scrapeGeneric(jobId, page, profileUrl || '', signal);
|
||||
}
|
||||
|
||||
// Close context
|
||||
await context.close();
|
||||
|
||||
// Save result to profile if profile_id provided
|
||||
if (config.profileId && config.targetId && result) {
|
||||
await Target.updateOne(
|
||||
{ _id: new mongoose.Types.ObjectId(config.targetId), 'profiles._id': new mongoose.Types.ObjectId(config.profileId) },
|
||||
{
|
||||
$set: {
|
||||
'profiles.$.profileData': result,
|
||||
'profiles.$.lastScraped': new Date()
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
await this.updateJobStatus(jobId, 'completed', 100, result);
|
||||
await this.log(jobId, 'info', 'Scrape completed successfully');
|
||||
|
||||
} catch (error: any) {
|
||||
const errorMessage = error.message || 'Unknown error';
|
||||
await this.log(jobId, 'error', `Scrape failed: ${errorMessage}`);
|
||||
await this.updateJobStatus(jobId, 'failed', undefined, undefined, errorMessage);
|
||||
} finally {
|
||||
this.activeJobs.delete(jobId);
|
||||
}
|
||||
}
|
||||
|
||||
private async loadSession(platform: string): Promise<any> {
|
||||
const session = await Session.findOne({ platform, status: 'active' }).lean();
|
||||
|
||||
if (!session) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
...session,
|
||||
cookies: JSON.parse(decrypt(session.cookiesEncrypted)),
|
||||
localStorage: session.localStorageEncrypted
|
||||
? JSON.parse(decrypt(session.localStorageEncrypted))
|
||||
: null,
|
||||
};
|
||||
}
|
||||
|
||||
private async getBrowser(): Promise<Browser> {
|
||||
if (!this.browser || !this.browser.isConnected()) {
|
||||
this.browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
],
|
||||
});
|
||||
}
|
||||
return this.browser;
|
||||
}
|
||||
|
||||
// Platform-specific scrapers
|
||||
private async scrapeTwitter(
|
||||
jobId: string,
|
||||
page: Page,
|
||||
profileUrl: string | undefined,
|
||||
signal: AbortSignal
|
||||
): Promise<any> {
|
||||
const url = profileUrl || 'https://x.com/home';
|
||||
|
||||
await this.log(jobId, 'info', `Navigating to: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
await this.updateJobStatus(jobId, 'running', 40);
|
||||
|
||||
if (signal.aborted) throw new Error('Job cancelled');
|
||||
|
||||
// Wait for content to load
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
await this.log(jobId, 'info', 'Extracting profile data...');
|
||||
await this.updateJobStatus(jobId, 'running', 60);
|
||||
|
||||
// Extract profile data
|
||||
const profileData = await page.evaluate(() => {
|
||||
const doc = document as any;
|
||||
const win = window as any;
|
||||
const data: any = {
|
||||
url: win.location.href,
|
||||
scraped_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// Try to extract profile info
|
||||
const nameElement = doc.querySelector('[data-testid="UserName"]');
|
||||
if (nameElement) {
|
||||
data.display_name = nameElement.querySelector('span')?.textContent;
|
||||
data.username = nameElement.querySelectorAll('span')[1]?.textContent;
|
||||
}
|
||||
|
||||
const bioElement = doc.querySelector('[data-testid="UserDescription"]');
|
||||
if (bioElement) {
|
||||
data.bio = bioElement.textContent;
|
||||
}
|
||||
|
||||
// Extract stats
|
||||
const statsElements = doc.querySelectorAll('[href*="/following"], [href*="/followers"]');
|
||||
statsElements.forEach((el: any) => {
|
||||
const href = el.getAttribute('href');
|
||||
const text = el.textContent;
|
||||
if (href?.includes('following')) {
|
||||
data.following = text;
|
||||
} else if (href?.includes('followers')) {
|
||||
data.followers = text;
|
||||
}
|
||||
});
|
||||
|
||||
// Get recent tweets
|
||||
const tweets: any[] = [];
|
||||
doc.querySelectorAll('[data-testid="tweet"]').forEach((tweet: any, i: number) => {
|
||||
if (i < 10) { // Limit to 10 tweets
|
||||
tweets.push({
|
||||
text: tweet.querySelector('[data-testid="tweetText"]')?.textContent,
|
||||
timestamp: tweet.querySelector('time')?.getAttribute('datetime'),
|
||||
});
|
||||
}
|
||||
});
|
||||
data.recent_tweets = tweets;
|
||||
|
||||
return data;
|
||||
});
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 80);
|
||||
await this.log(jobId, 'info', `Extracted profile: ${profileData.username || 'unknown'}`);
|
||||
|
||||
return profileData;
|
||||
}
|
||||
|
||||
private async scrapeInstagram(
|
||||
jobId: string,
|
||||
page: Page,
|
||||
profileUrl: string | undefined,
|
||||
signal: AbortSignal
|
||||
): Promise<any> {
|
||||
const url = profileUrl || 'https://instagram.com';
|
||||
|
||||
await this.log(jobId, 'info', `Navigating to: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
await this.updateJobStatus(jobId, 'running', 40);
|
||||
|
||||
if (signal.aborted) throw new Error('Job cancelled');
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
await this.log(jobId, 'info', 'Extracting profile data...');
|
||||
await this.updateJobStatus(jobId, 'running', 60);
|
||||
|
||||
const profileData = await page.evaluate(() => {
|
||||
const doc = document as any;
|
||||
const win = window as any;
|
||||
const data: any = {
|
||||
url: win.location.href,
|
||||
scraped_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// Extract from meta tags and visible elements
|
||||
const ogTitle = doc.querySelector('meta[property="og:title"]');
|
||||
if (ogTitle) {
|
||||
data.title = ogTitle.getAttribute('content');
|
||||
}
|
||||
|
||||
const ogDescription = document.querySelector('meta[property="og:description"]');
|
||||
if (ogDescription) {
|
||||
data.description = ogDescription.getAttribute('content');
|
||||
}
|
||||
|
||||
// Try to parse stats from description
|
||||
const statsMatch = data.description?.match(/(\d+(?:,\d+)*(?:\.\d+)?[KMB]?)\s+Followers/i);
|
||||
if (statsMatch) {
|
||||
data.followers = statsMatch[1];
|
||||
}
|
||||
|
||||
// Get profile picture
|
||||
const profilePic = doc.querySelector('img[alt*="profile picture"]');
|
||||
if (profilePic) {
|
||||
data.profile_picture = profilePic.getAttribute('src');
|
||||
}
|
||||
|
||||
return data;
|
||||
});
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 80);
|
||||
|
||||
return profileData;
|
||||
}
|
||||
|
||||
private async scrapeLinkedIn(
|
||||
jobId: string,
|
||||
page: Page,
|
||||
profileUrl: string | undefined,
|
||||
signal: AbortSignal
|
||||
): Promise<any> {
|
||||
const url = profileUrl || 'https://linkedin.com';
|
||||
|
||||
await this.log(jobId, 'info', `Navigating to: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
await this.updateJobStatus(jobId, 'running', 40);
|
||||
|
||||
if (signal.aborted) throw new Error('Job cancelled');
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
await this.updateJobStatus(jobId, 'running', 60);
|
||||
|
||||
const profileData = await page.evaluate(() => {
|
||||
const doc = document as any;
|
||||
const win = window as any;
|
||||
const data: any = {
|
||||
url: win.location.href,
|
||||
scraped_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// Extract profile info
|
||||
const nameElement = doc.querySelector('h1');
|
||||
if (nameElement) {
|
||||
data.name = nameElement.textContent?.trim();
|
||||
}
|
||||
|
||||
const headlineElement = document.querySelector('.text-body-medium');
|
||||
if (headlineElement) {
|
||||
data.headline = headlineElement.textContent?.trim();
|
||||
}
|
||||
|
||||
return data;
|
||||
});
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 80);
|
||||
|
||||
return profileData;
|
||||
}
|
||||
|
||||
private async scrapeFacebook(
|
||||
jobId: string,
|
||||
page: Page,
|
||||
profileUrl: string | undefined,
|
||||
signal: AbortSignal
|
||||
): Promise<any> {
|
||||
const url = profileUrl || 'https://facebook.com';
|
||||
|
||||
await this.log(jobId, 'info', `Navigating to: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
await this.updateJobStatus(jobId, 'running', 40);
|
||||
|
||||
if (signal.aborted) throw new Error('Job cancelled');
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
await this.updateJobStatus(jobId, 'running', 60);
|
||||
|
||||
const profileData = await page.evaluate(() => {
|
||||
const doc = document as any;
|
||||
const win = window as any;
|
||||
return {
|
||||
url: win.location.href,
|
||||
scraped_at: new Date().toISOString(),
|
||||
title: doc.title,
|
||||
};
|
||||
});
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 80);
|
||||
|
||||
return profileData;
|
||||
}
|
||||
|
||||
private async scrapeGeneric(
|
||||
jobId: string,
|
||||
page: Page,
|
||||
url: string,
|
||||
signal: AbortSignal
|
||||
): Promise<any> {
|
||||
await this.log(jobId, 'info', `Navigating to: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
||||
await this.updateJobStatus(jobId, 'running', 50);
|
||||
|
||||
if (signal.aborted) throw new Error('Job cancelled');
|
||||
|
||||
const data = await page.evaluate(() => {
|
||||
const doc = document as any;
|
||||
const win = window as any;
|
||||
return {
|
||||
url: win.location.href,
|
||||
title: doc.title,
|
||||
scraped_at: new Date().toISOString(),
|
||||
text_content: doc.body.innerText.substring(0, 5000),
|
||||
};
|
||||
});
|
||||
|
||||
await this.updateJobStatus(jobId, 'running', 80);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
async cancelJob(jobId: string): Promise<void> {
|
||||
const activeJob = this.activeJobs.get(jobId);
|
||||
|
||||
if (activeJob) {
|
||||
activeJob.abortController.abort();
|
||||
|
||||
if (activeJob.context) {
|
||||
await activeJob.context.close().catch(() => {});
|
||||
}
|
||||
|
||||
await this.updateJobStatus(jobId, 'cancelled');
|
||||
await this.log(jobId, 'info', 'Job cancelled by user');
|
||||
this.activeJobs.delete(jobId);
|
||||
}
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
// Cancel all active jobs
|
||||
for (const [jobId] of this.activeJobs) {
|
||||
await this.cancelJob(jobId);
|
||||
}
|
||||
|
||||
// Close browser
|
||||
if (this.browser) {
|
||||
await this.browser.close();
|
||||
this.browser = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
44
backend/src/utils/encryption.ts
Normal file
44
backend/src/utils/encryption.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import crypto from 'crypto';
|
||||
|
||||
const ALGORITHM = 'aes-256-gcm';
|
||||
const IV_LENGTH = 16;
|
||||
const AUTH_TAG_LENGTH = 16;
|
||||
|
||||
function getEncryptionKey(): Buffer {
|
||||
const key = process.env.VAULT_ENCRYPTION_KEY;
|
||||
if (!key || key.length !== 64) {
|
||||
throw new Error('VAULT_ENCRYPTION_KEY must be a 64-character hex string');
|
||||
}
|
||||
return Buffer.from(key, 'hex');
|
||||
}
|
||||
|
||||
export function encrypt(text: string): string {
|
||||
const iv = crypto.randomBytes(IV_LENGTH);
|
||||
const cipher = crypto.createCipheriv(ALGORITHM, getEncryptionKey(), iv);
|
||||
|
||||
let encrypted = cipher.update(text, 'utf8', 'hex');
|
||||
encrypted += cipher.final('hex');
|
||||
|
||||
const authTag = cipher.getAuthTag();
|
||||
|
||||
// Combine IV + AuthTag + Encrypted data
|
||||
return iv.toString('hex') + authTag.toString('hex') + encrypted;
|
||||
}
|
||||
|
||||
export function decrypt(encryptedData: string): string {
|
||||
const iv = Buffer.from(encryptedData.slice(0, IV_LENGTH * 2), 'hex');
|
||||
const authTag = Buffer.from(encryptedData.slice(IV_LENGTH * 2, IV_LENGTH * 2 + AUTH_TAG_LENGTH * 2), 'hex');
|
||||
const encrypted = encryptedData.slice(IV_LENGTH * 2 + AUTH_TAG_LENGTH * 2);
|
||||
|
||||
const decipher = crypto.createDecipheriv(ALGORITHM, getEncryptionKey(), iv);
|
||||
decipher.setAuthTag(authTag);
|
||||
|
||||
let decrypted = decipher.update(encrypted, 'hex', 'utf8');
|
||||
decrypted += decipher.final('utf8');
|
||||
|
||||
return decrypted;
|
||||
}
|
||||
|
||||
export function hashPassword(password: string): string {
|
||||
return crypto.createHash('sha256').update(password).digest('hex');
|
||||
}
|
||||
33
backend/src/utils/logger.ts
Normal file
33
backend/src/utils/logger.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import winston from 'winston';
|
||||
|
||||
const logFormat = winston.format.combine(
|
||||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
winston.format.errors({ stack: true }),
|
||||
winston.format.printf(({ level, message, timestamp, stack }) => {
|
||||
return `${timestamp} [${level.toUpperCase()}]: ${stack || message}`;
|
||||
})
|
||||
);
|
||||
|
||||
export const logger = winston.createLogger({
|
||||
level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',
|
||||
format: logFormat,
|
||||
transports: [
|
||||
new winston.transports.Console({
|
||||
format: winston.format.combine(
|
||||
winston.format.colorize(),
|
||||
logFormat
|
||||
),
|
||||
}),
|
||||
new winston.transports.File({
|
||||
filename: 'logs/error.log',
|
||||
level: 'error',
|
||||
maxsize: 5242880, // 5MB
|
||||
maxFiles: 5,
|
||||
}),
|
||||
new winston.transports.File({
|
||||
filename: 'logs/combined.log',
|
||||
maxsize: 5242880,
|
||||
maxFiles: 5,
|
||||
}),
|
||||
],
|
||||
});
|
||||
20
backend/tsconfig.json
Normal file
20
backend/tsconfig.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user