first commit

This commit is contained in:
2026-01-25 23:05:41 +02:00
commit dec7844b49
48 changed files with 10815 additions and 0 deletions

3114
backend/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

40
backend/package.json Normal file
View File

@@ -0,0 +1,40 @@
{
"name": "osint-backend",
"version": "1.0.0",
"description": "OSINT Platform Backend",
"type": "module",
"main": "dist/index.js",
"scripts": {
"dev": "tsx watch src/index.ts",
"build": "tsc",
"start": "node dist/index.js"
},
"dependencies": {
"express": "^4.18.2",
"cors": "^2.8.5",
"helmet": "^7.1.0",
"bcryptjs": "^2.4.3",
"jsonwebtoken": "^9.0.2",
"cookie-parser": "^1.4.6",
"mongoose": "^8.2.1",
"playwright": "^1.42.1",
"playwright-extra": "^4.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"dotenv": "^16.4.5",
"uuid": "^9.0.1",
"winston": "^3.11.0",
"express-rate-limit": "^7.2.0",
"socket.io": "^4.7.5"
},
"devDependencies": {
"@types/express": "^4.17.21",
"@types/cors": "^2.8.17",
"@types/bcryptjs": "^2.4.6",
"@types/jsonwebtoken": "^9.0.6",
"@types/cookie-parser": "^1.4.7",
"@types/uuid": "^9.0.8",
"@types/node": "^20.11.24",
"typescript": "^5.4.2",
"tsx": "^4.7.1"
}
}

View File

@@ -0,0 +1,35 @@
import mongoose from 'mongoose';
import { logger } from '../utils/logger.js';
export async function connectDatabase(): Promise<void> {
const mongoUri = process.env.MONGODB_URI;
if (!mongoUri) {
throw new Error('MONGODB_URI environment variable is required');
}
try {
await mongoose.connect(mongoUri, {
dbName: 'osint_platform',
});
logger.info('Connected to MongoDB');
mongoose.connection.on('error', (err) => {
logger.error('MongoDB connection error:', err);
});
mongoose.connection.on('disconnected', () => {
logger.warn('MongoDB disconnected');
});
} catch (error) {
logger.error('Failed to connect to MongoDB:', error);
throw error;
}
}
export async function closeDatabase(): Promise<void> {
await mongoose.connection.close();
logger.info('MongoDB connection closed');
}

131
backend/src/index.ts Normal file
View File

@@ -0,0 +1,131 @@
import express from 'express';
import cors from 'cors';
import helmet from 'helmet';
import cookieParser from 'cookie-parser';
import { createServer } from 'http';
import { Server } from 'socket.io';
import dotenv from 'dotenv';
import path from 'path';
import { fileURLToPath } from 'url';
import { authRouter } from './routes/auth.js';
import { targetsRouter } from './routes/targets.js';
import { sessionsRouter } from './routes/sessions.js';
import { scraperRouter } from './routes/scraper.js';
import { authMiddleware } from './middleware/auth.js';
import { connectDatabase } from './database/index.js';
import { logger } from './utils/logger.js';
import { ScraperManager } from './scraper/manager.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
dotenv.config({ path: path.join(__dirname, '../../.env') });
const app = express();
const httpServer = createServer(app);
const io = new Server(httpServer, {
cors: {
origin: process.env.FRONTEND_URL || 'http://localhost:5173',
credentials: true,
},
});
const PORT = process.env.PORT || 3001;
// Initialize scraper manager with socket.io
const scraperManager = new ScraperManager(io);
app.set('scraperManager', scraperManager);
app.set('io', io);
// Middleware
app.use(helmet({
contentSecurityPolicy: false,
}));
app.use(cors({
origin: process.env.FRONTEND_URL || 'http://localhost:5173',
credentials: true,
}));
app.use(express.json());
app.use(cookieParser());
// Serve static files from frontend build in production
if (process.env.NODE_ENV === 'production') {
app.use(express.static(path.join(__dirname, '../../frontend/dist')));
}
// API Routes
app.use('/api/auth', authRouter);
app.use('/api/targets', authMiddleware, targetsRouter);
app.use('/api/sessions', authMiddleware, sessionsRouter);
app.use('/api/scraper', authMiddleware, scraperRouter);
// Health check
app.get('/api/health', (req, res) => {
res.json({ status: 'ok', timestamp: new Date().toISOString() });
});
// Catch-all for SPA routing in production
if (process.env.NODE_ENV === 'production') {
app.get('*', (req, res) => {
res.sendFile(path.join(__dirname, '../../frontend/dist/index.html'));
});
}
// Socket.io authentication
io.use((socket, next) => {
const token = socket.handshake.auth.token;
if (!token) {
return next(new Error('Authentication required'));
}
// Token validation would go here
next();
});
io.on('connection', (socket) => {
logger.info(`Client connected: ${socket.id}`);
socket.on('subscribe:scraper', (scraperId: string) => {
socket.join(`scraper:${scraperId}`);
});
socket.on('unsubscribe:scraper', (scraperId: string) => {
socket.leave(`scraper:${scraperId}`);
});
socket.on('disconnect', () => {
logger.info(`Client disconnected: ${socket.id}`);
});
});
// Global error handlers
process.on('unhandledRejection', (reason, promise) => {
logger.error('Unhandled Rejection at:', promise, 'reason:', reason);
});
process.on('uncaughtException', (error) => {
logger.error('Uncaught Exception:', error);
// Important: give the logger time to write before exiting
setTimeout(() => {
process.exit(1);
}, 1000);
});
// Start server with database connection
async function start() {
try {
await connectDatabase();
httpServer.listen(PORT, () => {
logger.info(`🔒 OSINT Platform Backend running on port ${PORT}`);
logger.info(`📊 Environment: ${process.env.NODE_ENV || 'development'}`);
});
} catch (error) {
logger.error('Failed to start server:', error);
process.exit(1);
}
}
start();
export { app, io };

View File

@@ -0,0 +1,55 @@
import { Request, Response, NextFunction } from 'express';
import jwt from 'jsonwebtoken';
import { logger } from '../utils/logger.js';
export interface AuthRequest extends Request {
userId?: string;
}
export function authMiddleware(req: AuthRequest, res: Response, next: NextFunction): void {
try {
// Check for token in Authorization header or cookie
const authHeader = req.headers.authorization;
const cookieToken = req.cookies?.auth_token;
let token: string | undefined;
if (authHeader && authHeader.startsWith('Bearer ')) {
token = authHeader.substring(7);
} else if (cookieToken) {
token = cookieToken;
}
if (!token) {
res.status(401).json({ error: 'Authentication required' });
return;
}
const jwtSecret = process.env.JWT_SECRET;
if (!jwtSecret) {
logger.error('JWT_SECRET not configured');
res.status(500).json({ error: 'Server configuration error' });
return;
}
const decoded = jwt.verify(token, jwtSecret) as { authenticated: boolean };
if (!decoded.authenticated) {
res.status(401).json({ error: 'Invalid token' });
return;
}
next();
} catch (error) {
if (error instanceof jwt.TokenExpiredError) {
res.status(401).json({ error: 'Token expired' });
return;
}
if (error instanceof jwt.JsonWebTokenError) {
res.status(401).json({ error: 'Invalid token' });
return;
}
logger.error('Auth middleware error:', error);
res.status(500).json({ error: 'Authentication error' });
}
}

View File

@@ -0,0 +1,52 @@
import mongoose, { Document, Schema } from 'mongoose';
export interface IScraperLog {
level: string;
message: string;
timestamp: Date;
}
export interface IScraperJob extends Document {
targetId?: mongoose.Types.ObjectId;
profileId?: mongoose.Types.ObjectId;
platform: string;
status: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
progress: number;
result?: Record<string, any>;
error?: string;
logs: IScraperLog[];
startedAt?: Date;
completedAt?: Date;
createdAt: Date;
}
const ScraperLogSchema = new Schema<IScraperLog>({
level: { type: String, required: true },
message: { type: String, required: true },
timestamp: { type: Date, default: Date.now },
}, { _id: false });
const ScraperJobSchema = new Schema<IScraperJob>({
targetId: { type: Schema.Types.ObjectId, ref: 'Target' },
profileId: { type: Schema.Types.ObjectId },
platform: { type: String, required: true },
status: {
type: String,
enum: ['pending', 'running', 'completed', 'failed', 'cancelled'],
default: 'pending'
},
progress: { type: Number, default: 0 },
result: { type: Schema.Types.Mixed },
error: { type: String },
logs: [ScraperLogSchema],
startedAt: { type: Date },
completedAt: { type: Date },
}, {
timestamps: true,
});
// Index for efficient querying
ScraperJobSchema.index({ status: 1, createdAt: -1 });
ScraperJobSchema.index({ targetId: 1 });
export const ScraperJob = mongoose.model<IScraperJob>('ScraperJob', ScraperJobSchema);

View File

@@ -0,0 +1,33 @@
import mongoose, { Document, Schema } from 'mongoose';
export interface ISession extends Document {
platform: string;
sessionName: string;
cookiesEncrypted: string;
localStorageEncrypted?: string;
userAgent?: string;
proxy?: string;
status: 'active' | 'expired' | 'invalid';
lastValidated?: Date;
createdAt: Date;
updatedAt: Date;
}
const SessionSchema = new Schema<ISession>({
platform: { type: String, required: true, unique: true },
sessionName: { type: String, required: true },
cookiesEncrypted: { type: String, required: true },
localStorageEncrypted: { type: String },
userAgent: { type: String },
proxy: { type: String },
status: {
type: String,
enum: ['active', 'expired', 'invalid'],
default: 'active'
},
lastValidated: { type: Date },
}, {
timestamps: true,
});
export const Session = mongoose.model<ISession>('Session', SessionSchema);

View File

@@ -0,0 +1,47 @@
import mongoose, { Document, Schema } from 'mongoose';
export interface ISocialProfile {
_id: mongoose.Types.ObjectId;
platform: string;
username?: string;
profileUrl?: string;
profileData?: Record<string, any>;
lastScraped?: Date;
createdAt: Date;
}
export interface ITarget extends Document {
name: string;
notes?: string;
profiles: ISocialProfile[];
createdAt: Date;
updatedAt: Date;
}
const SocialProfileSchema = new Schema<ISocialProfile>({
platform: { type: String, required: true },
username: { type: String },
profileUrl: { type: String },
profileData: { type: Schema.Types.Mixed },
lastScraped: { type: Date },
createdAt: { type: Date, default: Date.now },
});
const TargetSchema = new Schema<ITarget>({
name: { type: String, required: true },
notes: { type: String },
profiles: [SocialProfileSchema],
}, {
timestamps: true,
});
// Virtual for profile count
TargetSchema.virtual('profileCount').get(function() {
return this.profiles?.length || 0;
});
// Ensure virtuals are serialized
TargetSchema.set('toJSON', { virtuals: true });
TargetSchema.set('toObject', { virtuals: true });
export const Target = mongoose.model<ITarget>('Target', TargetSchema);

View File

@@ -0,0 +1,3 @@
export { Target, type ITarget, type ISocialProfile } from './Target.js';
export { Session, type ISession } from './Session.js';
export { ScraperJob, type IScraperJob, type IScraperLog } from './ScraperJob.js';

113
backend/src/routes/auth.ts Normal file
View File

@@ -0,0 +1,113 @@
import { Router, Request, Response } from 'express';
import bcrypt from 'bcryptjs';
import jwt from 'jsonwebtoken';
import rateLimit from 'express-rate-limit';
import { logger } from '../utils/logger.js';
export const authRouter = Router();
// Rate limiting for auth endpoints
const authLimiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 5, // 5 attempts per window
message: { error: 'Too many authentication attempts, please try again later' },
standardHeaders: true,
legacyHeaders: false,
});
// Login with master password
authRouter.post('/login', authLimiter, async (req: Request, res: Response) => {
try {
const { password } = req.body;
if (!password) {
res.status(400).json({ error: 'Password is required' });
return;
}
const masterPassword = process.env.MASTER_PASSWORD;
if (!masterPassword) {
logger.error('MASTER_PASSWORD not configured');
res.status(500).json({ error: 'Server configuration error' });
return;
}
// Simple comparison for now - in production you'd hash the stored password
const isValid = password === masterPassword;
if (!isValid) {
logger.warn('Failed login attempt');
res.status(401).json({ error: 'Invalid password' });
return;
}
const jwtSecret = process.env.JWT_SECRET;
if (!jwtSecret) {
logger.error('JWT_SECRET not configured');
res.status(500).json({ error: 'Server configuration error' });
return;
}
const token = jwt.sign(
{ authenticated: true },
jwtSecret as string,
{ expiresIn: (process.env.SESSION_EXPIRY || '24h') as any }
);
// Set HTTP-only cookie
res.cookie('auth_token', token, {
httpOnly: true,
secure: process.env.NODE_ENV === 'production',
sameSite: 'strict',
maxAge: 24 * 60 * 60 * 1000, // 24 hours
});
logger.info('Successful login');
res.json({
success: true,
token,
expiresIn: process.env.SESSION_EXPIRY || '24h'
});
} catch (error) {
logger.error('Login error:', error);
res.status(500).json({ error: 'Authentication failed' });
}
});
// Verify token
authRouter.get('/verify', (req: Request, res: Response) => {
try {
const authHeader = req.headers.authorization;
const cookieToken = req.cookies?.auth_token;
let token: string | undefined;
if (authHeader && authHeader.startsWith('Bearer ')) {
token = authHeader.substring(7);
} else if (cookieToken) {
token = cookieToken;
}
if (!token) {
res.status(401).json({ authenticated: false });
return;
}
const jwtSecret = process.env.JWT_SECRET;
if (!jwtSecret) {
res.status(500).json({ error: 'Server configuration error' });
return;
}
jwt.verify(token, jwtSecret);
res.json({ authenticated: true });
} catch (error) {
res.status(401).json({ authenticated: false });
}
});
// Logout
authRouter.post('/logout', (req: Request, res: Response) => {
res.clearCookie('auth_token');
res.json({ success: true });
});

View File

@@ -0,0 +1,199 @@
import { Router, Request, Response } from 'express';
import mongoose from 'mongoose';
import { ScraperJob } from '../models/ScraperJob.js';
import { Target } from '../models/Target.js';
import { logger } from '../utils/logger.js';
import { ScraperManager } from '../scraper/manager.js';
export const scraperRouter = Router();
// Get all jobs
scraperRouter.get('/jobs', async (req: Request, res: Response) => {
try {
const limit = parseInt(req.query.limit as string) || 50;
const jobs = await ScraperJob.find()
.sort({ createdAt: -1 })
.limit(limit)
.lean();
// Populate target names
const targetIds = [...new Set(jobs.filter(j => j.targetId).map(j => j.targetId!.toString()))];
const targets = await Target.find({ _id: { $in: targetIds } }).select('name').lean();
const targetMap = new Map(targets.map(t => [t._id.toString(), t.name]));
const formattedJobs = jobs.map(job => ({
id: job._id,
target_id: job.targetId,
profile_id: job.profileId,
platform: job.platform,
status: job.status,
progress: job.progress,
result: job.result ? JSON.stringify(job.result) : null,
error: job.error,
target_name: job.targetId ? targetMap.get(job.targetId.toString()) : null,
started_at: job.startedAt,
completed_at: job.completedAt,
created_at: job.createdAt,
}));
res.json(formattedJobs);
} catch (error) {
logger.error('Error fetching jobs:', error);
res.status(500).json({ error: 'Failed to fetch jobs' });
}
});
// Get job by ID with logs
scraperRouter.get('/jobs/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const job = await ScraperJob.findById(id).lean();
if (!job) {
res.status(404).json({ error: 'Job not found' });
return;
}
// Get target name
let targetName = null;
if (job.targetId) {
const target = await Target.findById(job.targetId).select('name').lean();
targetName = target?.name;
}
res.json({
id: job._id,
target_id: job.targetId,
profile_id: job.profileId,
platform: job.platform,
status: job.status,
progress: job.progress,
result: job.result,
error: job.error,
target_name: targetName,
started_at: job.startedAt,
completed_at: job.completedAt,
created_at: job.createdAt,
logs: job.logs,
});
} catch (error) {
logger.error('Error fetching job:', error);
res.status(500).json({ error: 'Failed to fetch job' });
}
});
// Start a new scrape job
scraperRouter.post('/start', async (req: Request, res: Response) => {
try {
const { target_id, profile_id, platform, profile_url } = req.body;
if (!platform) {
res.status(400).json({ error: 'Platform is required' });
return;
}
// Create job record
const job = new ScraperJob({
targetId: target_id ? new mongoose.Types.ObjectId(target_id) : undefined,
profileId: profile_id ? new mongoose.Types.ObjectId(profile_id) : undefined,
platform,
status: 'pending',
progress: 0,
logs: [],
});
await job.save();
// Get scraper manager and start job
const scraperManager = req.app.get('scraperManager') as ScraperManager;
scraperManager.startJob({
jobId: job._id.toString(),
platform,
profileUrl: profile_url,
targetId: target_id,
profileId: profile_id,
});
logger.info(`Started scraper job: ${job._id} for ${platform}`);
res.status(201).json({
id: job._id,
platform: job.platform,
status: job.status,
progress: job.progress,
created_at: job.createdAt,
});
} catch (error) {
logger.error('Error starting job:', error);
res.status(500).json({ error: 'Failed to start job' });
}
});
// Cancel a job
scraperRouter.post('/jobs/:id/cancel', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const scraperManager = req.app.get('scraperManager') as ScraperManager;
await scraperManager.cancelJob(id);
const job = await ScraperJob.findById(id).lean();
res.json({
id: job?._id,
status: job?.status,
});
} catch (error) {
logger.error('Error cancelling job:', error);
res.status(500).json({ error: 'Failed to cancel job' });
}
});
// Get active jobs summary
scraperRouter.get('/status', async (req: Request, res: Response) => {
try {
const [pending, running, completed, failed] = await Promise.all([
ScraperJob.countDocuments({ status: 'pending' }),
ScraperJob.countDocuments({ status: 'running' }),
ScraperJob.countDocuments({ status: 'completed' }),
ScraperJob.countDocuments({ status: 'failed' }),
]);
const activeJobs = await ScraperJob.find({ status: { $in: ['pending', 'running'] } })
.sort({ createdAt: -1 })
.select('_id platform status progress startedAt')
.lean();
res.json({
counts: { pending, running, completed, failed },
activeJobs: activeJobs.map(j => ({
id: j._id,
platform: j.platform,
status: j.status,
progress: j.progress,
started_at: j.startedAt,
})),
});
} catch (error) {
logger.error('Error fetching scraper status:', error);
res.status(500).json({ error: 'Failed to fetch status' });
}
});
// Get logs for a job
scraperRouter.get('/jobs/:id/logs', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const job = await ScraperJob.findById(id).select('logs').lean();
if (!job) {
res.status(404).json({ error: 'Job not found' });
return;
}
res.json(job.logs || []);
} catch (error) {
logger.error('Error fetching logs:', error);
res.status(500).json({ error: 'Failed to fetch logs' });
}
});

View File

@@ -0,0 +1,225 @@
import { Router, Request, Response } from 'express';
import { Session } from '../models/Session.js';
import { encrypt, decrypt } from '../utils/encryption.js';
import { logger } from '../utils/logger.js';
export const sessionsRouter = Router();
// Get all sessions (without sensitive data)
sessionsRouter.get('/', async (req: Request, res: Response) => {
try {
const sessions = await Session.find()
.select('-cookiesEncrypted -localStorageEncrypted')
.sort({ updatedAt: -1 })
.lean();
const formattedSessions = sessions.map(s => ({
id: s._id,
platform: s.platform,
session_name: s.sessionName,
user_agent: s.userAgent,
proxy: s.proxy,
status: s.status,
last_validated: s.lastValidated,
created_at: s.createdAt,
updated_at: s.updatedAt,
}));
res.json(formattedSessions);
} catch (error) {
logger.error('Error fetching sessions:', error);
res.status(500).json({ error: 'Failed to fetch sessions' });
}
});
// Get session by platform
sessionsRouter.get('/platform/:platform', async (req: Request, res: Response) => {
try {
const { platform } = req.params;
const session = await Session.findOne({ platform })
.select('-cookiesEncrypted -localStorageEncrypted')
.lean();
if (!session) {
res.status(404).json({ error: 'Session not found for platform' });
return;
}
res.json({
id: session._id,
platform: session.platform,
session_name: session.sessionName,
user_agent: session.userAgent,
proxy: session.proxy,
status: session.status,
last_validated: session.lastValidated,
created_at: session.createdAt,
updated_at: session.updatedAt,
});
} catch (error) {
logger.error('Error fetching session:', error);
res.status(500).json({ error: 'Failed to fetch session' });
}
});
// Create or update session
sessionsRouter.post('/', async (req: Request, res: Response) => {
try {
const { platform, session_name, cookies, local_storage, user_agent, proxy } = req.body;
if (!platform || !session_name || !cookies) {
res.status(400).json({ error: 'Platform, session_name, and cookies are required' });
return;
}
// Encrypt sensitive data
const cookiesEncrypted = encrypt(JSON.stringify(cookies));
const localStorageEncrypted = local_storage ? encrypt(JSON.stringify(local_storage)) : undefined;
// Upsert session
const session = await Session.findOneAndUpdate(
{ platform },
{
platform,
sessionName: session_name,
cookiesEncrypted,
localStorageEncrypted,
userAgent: user_agent,
proxy,
status: 'active',
},
{ upsert: true, new: true }
).lean();
logger.info(`Saved session for ${platform}`);
res.status(201).json({
id: session._id,
platform: session.platform,
session_name: session.sessionName,
user_agent: session.userAgent,
proxy: session.proxy,
status: session.status,
created_at: session.createdAt,
updated_at: session.updatedAt,
});
} catch (error) {
logger.error('Error saving session:', error);
res.status(500).json({ error: 'Failed to save session' });
}
});
// Get decrypted session data (for internal scraper use)
sessionsRouter.get('/decrypt/:platform', async (req: Request, res: Response) => {
try {
const { platform } = req.params;
const session = await Session.findOne({ platform }).lean();
if (!session) {
res.status(404).json({ error: 'Session not found' });
return;
}
const cookies = JSON.parse(decrypt(session.cookiesEncrypted));
const localStorage = session.localStorageEncrypted
? JSON.parse(decrypt(session.localStorageEncrypted))
: null;
res.json({
id: session._id,
platform: session.platform,
session_name: session.sessionName,
cookies,
localStorage,
user_agent: session.userAgent,
proxy: session.proxy,
status: session.status,
});
} catch (error) {
logger.error('Error decrypting session:', error);
res.status(500).json({ error: 'Failed to decrypt session' });
}
});
// Validate session
sessionsRouter.post('/validate/:platform', async (req: Request, res: Response) => {
try {
const { platform } = req.params;
const session = await Session.findOneAndUpdate(
{ platform },
{ lastValidated: new Date() },
{ new: true }
).select('-cookiesEncrypted -localStorageEncrypted').lean();
if (!session) {
res.status(404).json({ error: 'Session not found' });
return;
}
res.json({
id: session._id,
platform: session.platform,
session_name: session.sessionName,
status: session.status,
last_validated: session.lastValidated,
});
} catch (error) {
logger.error('Error validating session:', error);
res.status(500).json({ error: 'Failed to validate session' });
}
});
// Update session status
sessionsRouter.patch('/:id/status', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { status } = req.body;
if (!status || !['active', 'expired', 'invalid'].includes(status)) {
res.status(400).json({ error: 'Valid status required (active, expired, invalid)' });
return;
}
const session = await Session.findByIdAndUpdate(
id,
{ status },
{ new: true }
).select('-cookiesEncrypted -localStorageEncrypted').lean();
if (!session) {
res.status(404).json({ error: 'Session not found' });
return;
}
res.json({
id: session._id,
platform: session.platform,
session_name: session.sessionName,
status: session.status,
updated_at: session.updatedAt,
});
} catch (error) {
logger.error('Error updating session status:', error);
res.status(500).json({ error: 'Failed to update session status' });
}
});
// Delete session
sessionsRouter.delete('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const result = await Session.findByIdAndDelete(id);
if (!result) {
res.status(404).json({ error: 'Session not found' });
return;
}
logger.info(`Deleted session: ${id}`);
res.json({ success: true });
} catch (error) {
logger.error('Error deleting session:', error);
res.status(500).json({ error: 'Failed to delete session' });
}
});

View File

@@ -0,0 +1,199 @@
import { Router, Request, Response } from 'express';
import { Target } from '../models/Target.js';
import { logger } from '../utils/logger.js';
export const targetsRouter = Router();
// Get all targets
targetsRouter.get('/', async (req: Request, res: Response) => {
try {
const targets = await Target.find()
.sort({ updatedAt: -1 })
.lean();
// Add profile count
const targetsWithCount = targets.map(t => ({
...t,
id: t._id,
profile_count: t.profiles?.length || 0,
}));
res.json(targetsWithCount);
} catch (error) {
logger.error('Error fetching targets:', error);
res.status(500).json({ error: 'Failed to fetch targets' });
}
});
// Get single target with profiles
targetsRouter.get('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const target = await Target.findById(id).lean();
if (!target) {
res.status(404).json({ error: 'Target not found' });
return;
}
res.json({
...target,
id: target._id,
profiles: target.profiles?.map(p => ({
...p,
id: p._id,
target_id: target._id,
profile_url: p.profileUrl,
profile_data: p.profileData ? JSON.stringify(p.profileData) : null,
last_scraped: p.lastScraped,
created_at: p.createdAt,
})) || [],
});
} catch (error) {
logger.error('Error fetching target:', error);
res.status(500).json({ error: 'Failed to fetch target' });
}
});
// Create target
targetsRouter.post('/', async (req: Request, res: Response) => {
try {
const { name, notes } = req.body;
if (!name) {
res.status(400).json({ error: 'Name is required' });
return;
}
const target = new Target({ name, notes, profiles: [] });
await target.save();
logger.info(`Created target: ${name} (${target._id})`);
res.status(201).json({
...target.toObject(),
id: target._id,
profile_count: 0,
});
} catch (error) {
logger.error('Error creating target:', error);
res.status(500).json({ error: 'Failed to create target' });
}
});
// Update target
targetsRouter.put('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { name, notes } = req.body;
const target = await Target.findByIdAndUpdate(
id,
{ name, notes },
{ new: true }
).lean();
if (!target) {
res.status(404).json({ error: 'Target not found' });
return;
}
res.json({ ...target, id: target._id });
} catch (error) {
logger.error('Error updating target:', error);
res.status(500).json({ error: 'Failed to update target' });
}
});
// Delete target
targetsRouter.delete('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const result = await Target.findByIdAndDelete(id);
if (!result) {
res.status(404).json({ error: 'Target not found' });
return;
}
logger.info(`Deleted target: ${id}`);
res.json({ success: true });
} catch (error) {
logger.error('Error deleting target:', error);
res.status(500).json({ error: 'Failed to delete target' });
}
});
// Add profile to target
targetsRouter.post('/:id/profiles', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { platform, username, profile_url } = req.body;
if (!platform) {
res.status(400).json({ error: 'Platform is required' });
return;
}
const target = await Target.findById(id);
if (!target) {
res.status(404).json({ error: 'Target not found' });
return;
}
const newProfile = {
platform,
username: username || undefined,
profileUrl: profile_url || undefined,
createdAt: new Date(),
};
target.profiles.push(newProfile as any);
await target.save();
const addedProfile = target.profiles[target.profiles.length - 1];
logger.info(`Added ${platform} profile to target ${id}`);
res.status(201).json({
id: addedProfile._id,
target_id: id,
platform: addedProfile.platform,
username: addedProfile.username,
profile_url: addedProfile.profileUrl,
created_at: addedProfile.createdAt,
});
} catch (error) {
logger.error('Error adding profile:', error);
res.status(500).json({ error: 'Failed to add profile' });
}
});
// Delete profile
targetsRouter.delete('/:id/profiles/:profileId', async (req: Request, res: Response) => {
try {
const { id, profileId } = req.params;
const target = await Target.findById(id);
if (!target) {
res.status(404).json({ error: 'Target not found' });
return;
}
const profileIndex = target.profiles.findIndex(
p => p._id.toString() === profileId
);
if (profileIndex === -1) {
res.status(404).json({ error: 'Profile not found' });
return;
}
target.profiles.splice(profileIndex, 1);
await target.save();
logger.info(`Deleted profile ${profileId} from target ${id}`);
res.json({ success: true });
} catch (error) {
logger.error('Error deleting profile:', error);
res.status(500).json({ error: 'Failed to delete profile' });
}
});

View File

@@ -0,0 +1,509 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { Server } from 'socket.io';
import { chromium, Browser, BrowserContext, Page } from 'playwright';
import mongoose from 'mongoose';
import { ScraperJob } from '../models/ScraperJob.js';
import { Session } from '../models/Session.js';
import { Target } from '../models/Target.js';
import { decrypt } from '../utils/encryption.js';
import { logger } from '../utils/logger.js';
declare const document: any;
declare const window: any;
export interface ScraperJobConfig {
jobId: string;
platform: string;
profileUrl?: string;
targetId?: string;
profileId?: string;
}
interface ActiveJob {
config: ScraperJobConfig;
browser?: Browser;
context?: BrowserContext;
page?: Page;
abortController: AbortController;
}
export class ScraperManager {
private io: Server;
private activeJobs: Map<string, ActiveJob> = new Map();
private browser: Browser | null = null;
constructor(io: Server) {
this.io = io;
}
private async log(jobId: string, level: string, message: string): Promise<void> {
await ScraperJob.findByIdAndUpdate(jobId, {
$push: {
logs: {
level,
message,
timestamp: new Date(),
}
}
});
// Emit to socket
this.io.to(`scraper:${jobId}`).emit('scraper:log', {
jobId,
level,
message,
timestamp: new Date().toISOString(),
});
logger[level as 'info' | 'warn' | 'error'](`[Job ${jobId}] ${message}`);
}
private async updateJobStatus(
jobId: string,
status: string,
progress?: number,
result?: any,
error?: string
): Promise<void> {
const update: any = { status };
if (progress !== undefined) update.progress = progress;
if (result !== undefined) update.result = result;
if (error !== undefined) update.error = error;
if (status === 'running') update.startedAt = new Date();
if (status === 'completed' || status === 'failed' || status === 'cancelled') {
update.completedAt = new Date();
}
await ScraperJob.findByIdAndUpdate(jobId, update);
// Emit status update
this.io.to(`scraper:${jobId}`).emit('scraper:status', {
jobId,
status,
progress,
result: result ? result : undefined,
error,
});
// Also emit to general channel
this.io.emit('scraper:jobUpdate', { jobId, status, progress });
}
async startJob(config: ScraperJobConfig): Promise<void> {
const abortController = new AbortController();
const activeJob: ActiveJob = {
config,
abortController,
};
this.activeJobs.set(config.jobId, activeJob);
// Run asynchronously
this.runJob(config, abortController.signal).catch((error) => {
logger.error(`Job ${config.jobId} failed:`, error);
});
}
private async runJob(config: ScraperJobConfig, signal: AbortSignal): Promise<void> {
const { jobId, platform, profileUrl } = config;
try {
await this.updateJobStatus(jobId, 'running', 0);
await this.log(jobId, 'info', `Starting scrape for platform: ${platform}`);
// Load session from vault
const session = await this.loadSession(platform);
if (!session) {
throw new Error(`No session found for platform: ${platform}`);
}
await this.log(jobId, 'info', `Loaded session: ${session.sessionName}`);
await this.updateJobStatus(jobId, 'running', 10);
// Initialize browser
await this.log(jobId, 'info', 'Initializing browser...');
const browser = await this.getBrowser();
if (signal.aborted) {
throw new Error('Job cancelled');
}
// Create context with session
const context = await browser.newContext({
userAgent: session.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
viewport: { width: 1920, height: 1080 },
locale: 'en-US',
});
// Add cookies
if (session.cookies && Array.isArray(session.cookies)) {
await context.addCookies(session.cookies);
await this.log(jobId, 'info', `Loaded ${session.cookies.length} cookies`);
}
await this.updateJobStatus(jobId, 'running', 20);
const page = await context.newPage();
// Store references
const activeJob = this.activeJobs.get(jobId);
if (activeJob) {
activeJob.context = context;
activeJob.page = page;
}
if (signal.aborted) {
await context.close();
throw new Error('Job cancelled');
}
// Run platform-specific scraper
let result: any;
switch (platform.toLowerCase()) {
case 'x':
case 'twitter':
result = await this.scrapeTwitter(jobId, page, profileUrl, signal);
break;
case 'instagram':
result = await this.scrapeInstagram(jobId, page, profileUrl, signal);
break;
case 'linkedin':
result = await this.scrapeLinkedIn(jobId, page, profileUrl, signal);
break;
case 'facebook':
result = await this.scrapeFacebook(jobId, page, profileUrl, signal);
break;
default:
result = await this.scrapeGeneric(jobId, page, profileUrl || '', signal);
}
// Close context
await context.close();
// Save result to profile if profile_id provided
if (config.profileId && config.targetId && result) {
await Target.updateOne(
{ _id: new mongoose.Types.ObjectId(config.targetId), 'profiles._id': new mongoose.Types.ObjectId(config.profileId) },
{
$set: {
'profiles.$.profileData': result,
'profiles.$.lastScraped': new Date()
}
}
);
}
await this.updateJobStatus(jobId, 'completed', 100, result);
await this.log(jobId, 'info', 'Scrape completed successfully');
} catch (error: any) {
const errorMessage = error.message || 'Unknown error';
await this.log(jobId, 'error', `Scrape failed: ${errorMessage}`);
await this.updateJobStatus(jobId, 'failed', undefined, undefined, errorMessage);
} finally {
this.activeJobs.delete(jobId);
}
}
private async loadSession(platform: string): Promise<any> {
const session = await Session.findOne({ platform, status: 'active' }).lean();
if (!session) {
return null;
}
return {
...session,
cookies: JSON.parse(decrypt(session.cookiesEncrypted)),
localStorage: session.localStorageEncrypted
? JSON.parse(decrypt(session.localStorageEncrypted))
: null,
};
}
private async getBrowser(): Promise<Browser> {
if (!this.browser || !this.browser.isConnected()) {
this.browser = await chromium.launch({
headless: true,
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
],
});
}
return this.browser;
}
// Platform-specific scrapers
private async scrapeTwitter(
jobId: string,
page: Page,
profileUrl: string | undefined,
signal: AbortSignal
): Promise<any> {
const url = profileUrl || 'https://x.com/home';
await this.log(jobId, 'info', `Navigating to: ${url}`);
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
await this.updateJobStatus(jobId, 'running', 40);
if (signal.aborted) throw new Error('Job cancelled');
// Wait for content to load
await page.waitForTimeout(2000);
await this.log(jobId, 'info', 'Extracting profile data...');
await this.updateJobStatus(jobId, 'running', 60);
// Extract profile data
const profileData = await page.evaluate(() => {
const doc = document as any;
const win = window as any;
const data: any = {
url: win.location.href,
scraped_at: new Date().toISOString(),
};
// Try to extract profile info
const nameElement = doc.querySelector('[data-testid="UserName"]');
if (nameElement) {
data.display_name = nameElement.querySelector('span')?.textContent;
data.username = nameElement.querySelectorAll('span')[1]?.textContent;
}
const bioElement = doc.querySelector('[data-testid="UserDescription"]');
if (bioElement) {
data.bio = bioElement.textContent;
}
// Extract stats
const statsElements = doc.querySelectorAll('[href*="/following"], [href*="/followers"]');
statsElements.forEach((el: any) => {
const href = el.getAttribute('href');
const text = el.textContent;
if (href?.includes('following')) {
data.following = text;
} else if (href?.includes('followers')) {
data.followers = text;
}
});
// Get recent tweets
const tweets: any[] = [];
doc.querySelectorAll('[data-testid="tweet"]').forEach((tweet: any, i: number) => {
if (i < 10) { // Limit to 10 tweets
tweets.push({
text: tweet.querySelector('[data-testid="tweetText"]')?.textContent,
timestamp: tweet.querySelector('time')?.getAttribute('datetime'),
});
}
});
data.recent_tweets = tweets;
return data;
});
await this.updateJobStatus(jobId, 'running', 80);
await this.log(jobId, 'info', `Extracted profile: ${profileData.username || 'unknown'}`);
return profileData;
}
private async scrapeInstagram(
jobId: string,
page: Page,
profileUrl: string | undefined,
signal: AbortSignal
): Promise<any> {
const url = profileUrl || 'https://instagram.com';
await this.log(jobId, 'info', `Navigating to: ${url}`);
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
await this.updateJobStatus(jobId, 'running', 40);
if (signal.aborted) throw new Error('Job cancelled');
await page.waitForTimeout(2000);
await this.log(jobId, 'info', 'Extracting profile data...');
await this.updateJobStatus(jobId, 'running', 60);
const profileData = await page.evaluate(() => {
const doc = document as any;
const win = window as any;
const data: any = {
url: win.location.href,
scraped_at: new Date().toISOString(),
};
// Extract from meta tags and visible elements
const ogTitle = doc.querySelector('meta[property="og:title"]');
if (ogTitle) {
data.title = ogTitle.getAttribute('content');
}
const ogDescription = document.querySelector('meta[property="og:description"]');
if (ogDescription) {
data.description = ogDescription.getAttribute('content');
}
// Try to parse stats from description
const statsMatch = data.description?.match(/(\d+(?:,\d+)*(?:\.\d+)?[KMB]?)\s+Followers/i);
if (statsMatch) {
data.followers = statsMatch[1];
}
// Get profile picture
const profilePic = doc.querySelector('img[alt*="profile picture"]');
if (profilePic) {
data.profile_picture = profilePic.getAttribute('src');
}
return data;
});
await this.updateJobStatus(jobId, 'running', 80);
return profileData;
}
private async scrapeLinkedIn(
jobId: string,
page: Page,
profileUrl: string | undefined,
signal: AbortSignal
): Promise<any> {
const url = profileUrl || 'https://linkedin.com';
await this.log(jobId, 'info', `Navigating to: ${url}`);
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
await this.updateJobStatus(jobId, 'running', 40);
if (signal.aborted) throw new Error('Job cancelled');
await page.waitForTimeout(2000);
await this.updateJobStatus(jobId, 'running', 60);
const profileData = await page.evaluate(() => {
const doc = document as any;
const win = window as any;
const data: any = {
url: win.location.href,
scraped_at: new Date().toISOString(),
};
// Extract profile info
const nameElement = doc.querySelector('h1');
if (nameElement) {
data.name = nameElement.textContent?.trim();
}
const headlineElement = document.querySelector('.text-body-medium');
if (headlineElement) {
data.headline = headlineElement.textContent?.trim();
}
return data;
});
await this.updateJobStatus(jobId, 'running', 80);
return profileData;
}
private async scrapeFacebook(
jobId: string,
page: Page,
profileUrl: string | undefined,
signal: AbortSignal
): Promise<any> {
const url = profileUrl || 'https://facebook.com';
await this.log(jobId, 'info', `Navigating to: ${url}`);
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
await this.updateJobStatus(jobId, 'running', 40);
if (signal.aborted) throw new Error('Job cancelled');
await page.waitForTimeout(2000);
await this.updateJobStatus(jobId, 'running', 60);
const profileData = await page.evaluate(() => {
const doc = document as any;
const win = window as any;
return {
url: win.location.href,
scraped_at: new Date().toISOString(),
title: doc.title,
};
});
await this.updateJobStatus(jobId, 'running', 80);
return profileData;
}
private async scrapeGeneric(
jobId: string,
page: Page,
url: string,
signal: AbortSignal
): Promise<any> {
await this.log(jobId, 'info', `Navigating to: ${url}`);
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
await this.updateJobStatus(jobId, 'running', 50);
if (signal.aborted) throw new Error('Job cancelled');
const data = await page.evaluate(() => {
const doc = document as any;
const win = window as any;
return {
url: win.location.href,
title: doc.title,
scraped_at: new Date().toISOString(),
text_content: doc.body.innerText.substring(0, 5000),
};
});
await this.updateJobStatus(jobId, 'running', 80);
return data;
}
async cancelJob(jobId: string): Promise<void> {
const activeJob = this.activeJobs.get(jobId);
if (activeJob) {
activeJob.abortController.abort();
if (activeJob.context) {
await activeJob.context.close().catch(() => {});
}
await this.updateJobStatus(jobId, 'cancelled');
await this.log(jobId, 'info', 'Job cancelled by user');
this.activeJobs.delete(jobId);
}
}
async shutdown(): Promise<void> {
// Cancel all active jobs
for (const [jobId] of this.activeJobs) {
await this.cancelJob(jobId);
}
// Close browser
if (this.browser) {
await this.browser.close();
this.browser = null;
}
}
}

View File

@@ -0,0 +1,44 @@
import crypto from 'crypto';
const ALGORITHM = 'aes-256-gcm';
const IV_LENGTH = 16;
const AUTH_TAG_LENGTH = 16;
function getEncryptionKey(): Buffer {
const key = process.env.VAULT_ENCRYPTION_KEY;
if (!key || key.length !== 64) {
throw new Error('VAULT_ENCRYPTION_KEY must be a 64-character hex string');
}
return Buffer.from(key, 'hex');
}
export function encrypt(text: string): string {
const iv = crypto.randomBytes(IV_LENGTH);
const cipher = crypto.createCipheriv(ALGORITHM, getEncryptionKey(), iv);
let encrypted = cipher.update(text, 'utf8', 'hex');
encrypted += cipher.final('hex');
const authTag = cipher.getAuthTag();
// Combine IV + AuthTag + Encrypted data
return iv.toString('hex') + authTag.toString('hex') + encrypted;
}
export function decrypt(encryptedData: string): string {
const iv = Buffer.from(encryptedData.slice(0, IV_LENGTH * 2), 'hex');
const authTag = Buffer.from(encryptedData.slice(IV_LENGTH * 2, IV_LENGTH * 2 + AUTH_TAG_LENGTH * 2), 'hex');
const encrypted = encryptedData.slice(IV_LENGTH * 2 + AUTH_TAG_LENGTH * 2);
const decipher = crypto.createDecipheriv(ALGORITHM, getEncryptionKey(), iv);
decipher.setAuthTag(authTag);
let decrypted = decipher.update(encrypted, 'hex', 'utf8');
decrypted += decipher.final('utf8');
return decrypted;
}
export function hashPassword(password: string): string {
return crypto.createHash('sha256').update(password).digest('hex');
}

View File

@@ -0,0 +1,33 @@
import winston from 'winston';
const logFormat = winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.errors({ stack: true }),
winston.format.printf(({ level, message, timestamp, stack }) => {
return `${timestamp} [${level.toUpperCase()}]: ${stack || message}`;
})
);
export const logger = winston.createLogger({
level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',
format: logFormat,
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
logFormat
),
}),
new winston.transports.File({
filename: 'logs/error.log',
level: 'error',
maxsize: 5242880, // 5MB
maxFiles: 5,
}),
new winston.transports.File({
filename: 'logs/combined.log',
maxsize: 5242880,
maxFiles: 5,
}),
],
});

20
backend/tsconfig.json Normal file
View File

@@ -0,0 +1,20 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"lib": ["ES2022"],
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}