<!DOCTYPE html>
<html lang="pt-br">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Leitor de Arquivos HTML com Extração de Contatos e Datas</title>
</head>
<body>
<h2>Selecione um arquivo HTML para visualizar e extrair contatos e datas:</h2>
<input type="file" id="fileInput" accept=".html, .htm">
<button onclick="processFile()">Processar Arquivo</button>
<div id="output"></div>
<div id="contacts"></div>
<div id="dates"></div>
<div id="error"></div>
<script>
function processFile() {
const fileInput = document.getElementById('fileInput');
const file = fileInput.files[0];
const output = document.getElementById('output');
const contactsOutput = document.getElementById('contacts');
const datesOutput = document.getElementById('dates');
const errorOutput = document.getElementById('error');
output.innerHTML = ''; // Limpa a exibição anterior
contactsOutput.innerHTML = '<h3>Contatos Encontrados:</h3>';
datesOutput.innerHTML = '<h3>Datas Encontradas:</h3>';
errorOutput.innerHTML = ''; // Limpa mensagens de erro anteriores
// Verifica se o arquivo foi selecionado
if (!file) {
errorOutput.textContent = "Por favor, selecione um arquivo para processar.";
return;
}
// Verifica se o arquivo é HTML
if (!file.name.endsWith('.html') && !file.name.endsWith('.htm')) {
errorOutput.textContent = "Por favor, selecione um arquivo HTML válido.";
return;
}
const reader = new FileReader();
// Lê o conteúdo do arquivo e exibe no navegador
reader.onload = function(event) {
const htmlContent = event.target.result;
const htmlDiv = document.createElement('div');
htmlDiv.innerHTML = sanitizeHTML(htmlContent);
output.appendChild(htmlDiv);
// Extrai e exibe contatos e datas
extractContacts(htmlContent);
extractDates(htmlContent);
};
// Lida com erros de leitura
reader.onerror = function() {
errorOutput.textContent = `Erro ao ler o arquivo: ${file.name}`;
};
reader.readAsText(file);
}
// Função para sanitizar o HTML e evitar execução de scripts
function sanitizeHTML(htmlContent) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
// Remove tags de script para segurança
const scripts = tempDiv.querySelectorAll('script');
scripts.forEach(script => script.remove());
return tempDiv.innerHTML;
}
// Função para extrair contatos de e-mail e telefone
function extractContacts(htmlContent) {
const contactsOutput = document.getElementById('contacts');
const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
const phoneRegex = /(\+?\d{1,3})?[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{4,5}[-.\s]?\d{4}/g;
// Encontra e exibe e-mails
const emails = htmlContent.match(emailRegex) || [];
if (emails.length > 0) {
const emailDiv = document.createElement('div');
emailDiv.innerHTML = `<strong>E-mails encontrados:</strong> ${emails.join(', ')}`;
contactsOutput.appendChild(emailDiv);
}
// Encontra e exibe telefones
const phones = htmlContent.match(phoneRegex) || [];
if (phones.length > 0) {
const phoneDiv = document.createElement('div');
phoneDiv.innerHTML = `<strong>Telefones encontrados:</strong> ${phones.join(', ')}`;
contactsOutput.appendChild(phoneDiv);
}
}
// Função para extrair datas no formato DD/MM/AAAA, DD-MM-AAAA, etc.
function extractDates(htmlContent) {
const datesOutput = document.getElementById('dates');
const dateRegex = /\b\d{1,2}([\/.-])\d{1,2}\1\d{2,4}\b/g;
const dates = htmlContent.match(dateRegex) || [];
if (dates.length > 0) {
const datesDiv = document.createElement('div');
datesDiv.innerHTML = `<strong>Datas encontradas:</strong> ${dates.join(', ')}`;
datesOutput.appendChild(datesDiv);
}
}
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="pt-br">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Leitor de Arquivos HTML, Contatos, Datas e PDF</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
</head>
<body>
<h2>Selecione um arquivo HTML ou PDF para visualizar e extrair contatos e datas<br>E PDF arquivos:</h2>
<input type="file" id="fileInput" accept=".html, .htm, .pdf" multiple>
<button onclick="processFiles()">Processar Arquivos</button>
<div id="output"></div>
<div id="contacts"></div>
<div id="dates"></div>
<div id="error"></div>
<script>
function processFiles() {
const files = document.getElementById('fileInput').files;
const output = document.getElementById('output');
const contactsOutput = document.getElementById('contacts');
const datesOutput = document.getElementById('dates');
const errorOutput = document.getElementById('error');
output.innerHTML = '';
contactsOutput.innerHTML = '<h3>Contatos Encontrados:</h3>';
datesOutput.innerHTML = '<h3>Datas Encontradas:</h3>';
errorOutput.innerHTML = '';
Array.from(files).forEach(file => {
if (file.name.endsWith('.html') || file.name.endsWith('.htm')) {
processHTML(file);
} else if (file.name.endsWith('.pdf')) {
processPDF(file);
} else {
errorOutput.textContent = "Arquivo não suportado: " + file.name;
}
});
}
function processHTML(file) {
const reader = new FileReader();
reader.onload = function(event) {
const htmlContent = event.target.result;
const htmlDiv = document.createElement('div');
htmlDiv.innerHTML = sanitizeHTML(htmlContent);
document.getElementById('output').appendChild(htmlDiv);
extractContacts(htmlContent);
extractDates(htmlContent);
};
reader.readAsText(file);
}
function sanitizeHTML(htmlContent) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
tempDiv.querySelectorAll('script').forEach(script => script.remove());
return tempDiv.innerHTML;
}
function extractContacts(content) {
const contactsOutput = document.getElementById('contacts');
const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
const phoneRegex = /(\+?\d{1,3})?[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{4,5}[-.\s]?\d{4}/g;
const emails = content.match(emailRegex) || [];
if (emails.length > 0) {
contactsOutput.innerHTML += `<strong>E-mails encontrados:</strong> ${emails.join(', ')}<br>`;
}
const phones = content.match(phoneRegex) || [];
if (phones.length > 0) {
contactsOutput.innerHTML += `<strong>Telefones encontrados:</strong> ${phones.join(', ')}<br>`;
}
}
function extractDates(content) {
const datesOutput = document.getElementById('dates');
const dateRegex = /\b\d{1,2}([\/.-])\d{1,2}\1\d{2,4}\b/g;
const dates = content.match(dateRegex) || [];
if (dates.length > 0) {
datesOutput.innerHTML += `<strong>Datas encontradas:</strong> ${dates.join(', ')}<br>`;
}
}
async function processPDF(file) {
const pdfData = await file.arrayBuffer();
const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;
const output = document.getElementById('output');
let textContent = '';
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const text = await page.getTextContent();
textContent += text.items.map(item => item.str).join(' ') + '\n';
}
const pdfTextDiv = document.createElement('div');
pdfTextDiv.innerHTML = `<strong>Conteúdo do PDF (${file.name}):</strong><br>${textContent.replace(/\n/g, '<br>')}`;
output.appendChild(pdfTextDiv);
// Extração de contatos e datas do conteúdo do PDF
extractContacts(textContent);
extractDates(textContent);
}
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="pt-BR">
<head>
<meta charset="utf-8">
<title>Raspar Login</title>
</head>
<body>
<h1>Raspar Login</h1>
<input type="file" id="fileInput" accept=".html" multiple />
<div id="logins"></div>
<script>
// Função para extrair logins
function extractLogins(content) {
const loginsOutput = document.getElementById('logins');
const loginRegex = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b|\b[a-zA-Z0-9._%+-]{3,}\b/g;
const logins = content.match(loginRegex) || [];
if (logins.length > 0) {
loginsOutput.innerHTML += `<strong>Logins encontrados:</strong> ${logins.join(', ')}<br>`;
} else {
loginsOutput.innerHTML += `<strong>Nenhum login encontrado.</strong><br>`;
}
}
// Função para ler arquivos carregados
document.getElementById('fileInput').addEventListener('change', function(event) {
const files = event.target.files;
const loginsOutput = document.getElementById('logins');
loginsOutput.innerHTML = ""; // Limpa a área de resultados
for (let i = 0; i < files.length; i++) {
const file = files[i];
if (file.type === "text/html") { // Verifica se é um arquivo HTML
const reader = new FileReader();
reader.onload = function(e) {
const content = e.target.result;
loginsOutput.innerHTML += `<h3>Logins no arquivo: ${file.name}</h3>`;
extractLogins(content);
};
reader.readAsText(file);
} else {
loginsOutput.innerHTML += `<strong>Arquivo ${file.name} não é HTML.</strong><br>`;
}
}
});
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="pt-BR">
<head>
<meta charset="utf-8">
<title>Raspar Dados</title>
</head>
<body>
<h1>Raspar Dados (Logins, E-mails e Telefones)</h1>
<input type="file" id="fileInput" accept=".html" multiple />
<div id="results"></div>
<script>
// Função para extrair logins, e-mails e telefones
function extractData(content) {
const resultsOutput = document.getElementById('results');
// Regex para capturar e-mails
const emailRegex = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b/g;
// Regex para capturar números de telefone no formato (123) 456-7890, 123-456-7890, etc.
const phoneRegex = /\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{2,3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}\b/g;
// Regex para capturar logins simples (palavras com letras e números, entre 3 e 15 caracteres)
const loginRegex = /\b[a-zA-Z0-9._%+-]{3,15}\b/g;
// Encontrando correspondências
const emails = content.match(emailRegex) || [];
const phones = content.match(phoneRegex) || [];
const logins = content.match(loginRegex) || [];
// Exibindo os resultados encontrados
resultsOutput.innerHTML += `<strong>E-mails encontrados:</strong> ${emails.join(', ') || 'Nenhum'}<br>`;
resultsOutput.innerHTML += `<strong>Telefones encontrados:</strong> ${phones.join(', ') || 'Nenhum'}<br>`;
resultsOutput.innerHTML += `<strong>Logins encontrados:</strong> ${logins.join(', ') || 'Nenhum'}<br><br>`;
}
// Função para ler arquivos carregados
document.getElementById('fileInput').addEventListener('change', function(event) {
const files = event.target.files;
const resultsOutput = document.getElementById('results');
resultsOutput.innerHTML = ""; // Limpa a área de resultados
for (let i = 0; i < files.length; i++) {
const file = files[i];
if (file.type === "text/html") { // Verifica se é um arquivo HTML
const reader = new FileReader();
reader.onload = function(e) {
const content = e.target.result;
resultsOutput.innerHTML += `<h3>Dados do arquivo: ${file.name}</h3>`;
extractData(content);
};
reader.readAsText(file);
} else {
resultsOutput.innerHTML += `<strong>Arquivo ${file.name} não é HTML.</strong><br>`;
}
}
});
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="pt-br">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Extrator de Contatos: E-mails, Telefones e Logins</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; }
h2 { color: #333; }
.output-section { margin-top: 20px; }
.output-list { padding: 10px; background-color: #f9f9f9; border: 1px solid #ddd; }
.output-list strong { color: #007bff; }
</style>
</head>
<body>
<h2>Selecione arquivos HTML ou PDF para extrair contatos:</h2>
<input type="file" id="fileInput" accept=".html, .htm, .pdf" multiple>
<button onclick="extractContacts()">Extrair Contatos</button>
<div class="output-section">
<div id="outputEmails" class="output-list"><strong>E-mails Encontrados:</strong> Nenhum encontrado.</div>
<div id="outputPhones" class="output-list"><strong>Telefones Encontrados:</strong> Nenhum encontrado.</div>
<div id="outputLogins" class="output-list"><strong>Logins Encontrados:</strong> Nenhum encontrado.</div>
<div id="error"></div>
</div>
<script>
async function extractContacts() {
const files = document.getElementById('fileInput').files;
if (files.length === 0) {
document.getElementById('error').textContent = "Selecione pelo menos um arquivo.";
return;
}
// Reset output
document.getElementById('outputEmails').innerHTML = "<strong>E-mails Encontrados:</strong> ";
document.getElementById('outputPhones').innerHTML = "<strong>Telefones Encontrados:</strong> ";
document.getElementById('outputLogins').innerHTML = "<strong>Logins Encontrados:</strong> ";
document.getElementById('error').innerHTML = "";
// Initialize result arrays
let emails = new Set(), phones = new Set(), logins = new Set();
for (const file of files) {
try {
if (file.name.endsWith('.html') || file.name.endsWith('.htm')) {
const content = await file.text();
extractFromText(content, emails, phones, logins);
} else if (file.name.endsWith('.pdf')) {
const pdfContent = await extractTextFromPDF(file);
extractFromText(pdfContent, emails, phones, logins);
} else {
document.getElementById('error').textContent += `Arquivo não suportado: ${file.name}\n`;
}
} catch (error) {
document.getElementById('error').textContent += `Erro ao processar ${file.name}: ${error.message}\n`;
}
}
displayResults(emails, 'outputEmails');
displayResults(phones, 'outputPhones');
displayResults(logins, 'outputLogins');
}
function extractFromText(content, emails, phones, logins) {
const emailRegex = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(?:\.[a-zA-Z]{2,})?\b/g;
const phoneRegex = /(\+?\d{1,3})?[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{4,5}[-.\s]?\d{4}/g;
const loginRegex = /\b[a-zA-Z0-9._%+-]{3,20}\b/g;
(content.match(emailRegex) || []).forEach(email => emails.add(email));
(content.match(phoneRegex) || []).forEach(phone => phones.add(phone));
(content.match(loginRegex) || []).forEach(login => {
if (!emails.has(login)) logins.add(login); // Avoid adding emails to login list
});
}
async function extractTextFromPDF(file) {
const pdf = await pdfjsLib.getDocument(await file.arrayBuffer()).promise;
let textContent = '';
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const text = await page.getTextContent();
textContent += text.items.map(item => item.str).join(' ') + ' ';
}
return textContent;
}
function displayResults(results, elementId) {
const element = document.getElementById(elementId);
element.innerHTML += results.size > 0 ? Array.from(results).join(', ') : "Nenhum encontrado.";
}
</script>
</body>
</html>
MELHOR RASPA RASPA SITES HTML
<!DOCTYPE html>
<html lang="pt-br">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Leitor de Arquivos HTML com Extração</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
h2 {
color: #444;
}
#output, #contacts, #dates, #error {
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd;
border-radius: 5px;
background-color: #f9f9f9;
}
#error {
color: red;
}
button {
margin-top: 10px;
padding: 10px 20px;
border: none;
border-radius: 5px;
background-color: #007bff;
color: white;
cursor: pointer;
}
button:hover {
background-color: #0056b3;
}
</style>
</head>
<body>
<h2>Selecione um arquivo HTML para visualizar e extrair informações:</h2>
<input type="file" id="fileInput" accept=".html, .htm">
<button onclick="processFile()">Processar Arquivo</button>
<button onclick="clearOutput()">Limpar Resultados</button>
<div id="output"></div>
<div id="contacts"></div>
<div id="dates"></div>
<div id="error"></div>
<script>
function processFile() {
const fileInput = document.getElementById('fileInput');
const file = fileInput.files[0];
const output = document.getElementById('output');
const contactsOutput = document.getElementById('contacts');
const datesOutput = document.getElementById('dates');
const errorOutput = document.getElementById('error');
// Limpa saídas anteriores
output.innerHTML = '';
contactsOutput.innerHTML = '<h3>Contatos Encontrados:</h3>';
datesOutput.innerHTML = '<h3>Datas Encontradas:</h3>';
errorOutput.innerHTML = '';
// Verifica se um arquivo foi selecionado
if (!file) {
errorOutput.textContent = 'Por favor, selecione um arquivo HTML válido.';
return;
}
const reader = new FileReader();
// Evento disparado ao carregar o arquivo
reader.onload = function (event) {
const htmlContent = event.target.result;
try {
// Exibe o conteúdo do arquivo no navegador
output.innerHTML = `<h3>Conteúdo do Arquivo:</h3><pre>${sanitizeHTML(htmlContent)}</pre>`;
// Extrai contatos
const emails = extractEmails(htmlContent);
const phones = extractPhones(htmlContent);
const dates = extractDates(htmlContent);
if (emails.length > 0) {
contactsOutput.innerHTML += `<p><strong>E-mails:</strong> ${emails.join(', ')}</p>`;
}
if (phones.length > 0) {
contactsOutput.innerHTML += `<p><strong>Telefones:</strong> ${phones.join(', ')}</p>`;
}
if (dates.length > 0) {
datesOutput.innerHTML += `<p><strong>Datas:</strong> ${dates.join(', ')}</p>`;
}
} catch (error) {
console.error("Erro durante o processamento:", error);
errorOutput.textContent = "Erro ao processar o arquivo.";
}
};
reader.readAsText(file);
}
function sanitizeHTML(htmlContent) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
const scripts = tempDiv.querySelectorAll('script');
scripts.forEach(script => script.remove());
return tempDiv.innerHTML;
}
function extractEmails(htmlContent) {
const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
return htmlContent.match(emailRegex) || [];
}
function extractPhones(htmlContent) {
const phoneRegex = /(\+?\d{1,3})?[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{4,5}[-.\s]?\d{4}/g;
return htmlContent.match(phoneRegex) || [];
}
function extractDates(htmlContent) {
const dateRegex = /\b\d{1,2}([\/.-])\d{1,2}\1\d{2,4}\b/g;
return htmlContent.match(dateRegex) || [];
}
function clearOutput() {
document.getElementById('output').innerHTML = '';
document.getElementById('contacts').innerHTML = '';
document.getElementById('dates').innerHTML = '';
document.getElementById('error').innerHTML = '';
}
</script>
</body>
</html>