Peachy Text Extractor

The free online OCR and text extraction tool that pulls text from images, scanned PDFs, documents, videos, and audio files — all privately in your browser. Use optical character recognition to digitize printed text, transcribe speech to text from video and audio recordings, or extract content from any document format. Export your results to TXT, PDF, DOCX, HTML, or Markdown in seconds. No uploads to external servers, no account needed — perfect for students, researchers, and professionals who need fast, accurate text extraction without installing software.

Extraction Options

Select a file to see extraction details.

  • Images: OCR text recognition
  • Documents: direct text parsing
  • Audio: speech-to-text transcription
  • Video: audio track transcription
🔍
Drop a file here or click to browse
Supports images, documents, PDFs, audio & video files
📄

Export extracted text as:

Extracting...

Extraction Complete

⬇ Download

Supported Formats

🎨 Images (OCR)

PNG, JPG/JPEG, JFIF, WEBP, GIF, BMP, TIFF, SVG, HEIC/HEIF, PSD — extract text from photos, screenshots, scanned documents, and graphics using optical character recognition.

📄 Documents

PDF, DOCX, RTF, TXT, CSV, HTML, Markdown (.md), JSON, XML, YAML, EML, MSG — parse and extract text content directly from document files.

📊 Spreadsheets & Presentations

XLSX, XLS, PPTX, PPT — extract text and data from spreadsheets and presentation slides.

🎵 Audio (Speech-to-Text)

MP3, WAV, OGG, WEBM, FLAC, AAC, M4A, WMA, AIFF — transcribe speech using OpenAI Whisper AI running locally in your browser. No microphone needed.

🎬 Video (Speech-to-Text)

MP4, WEBM, AVI, MOV, MKV, FLV — extracts the audio track and transcribes with Whisper AI. Processes the file data directly.

📤 Export Formats

TXT, PDF, DOCX, RTF, HTML, Markdown (.md), CSV, JSON, XML, YAML — export extracted text in any of these document formats.

🔒 Your files never leave your device. All text extraction happens right in your browser — nothing is uploaded to any server.

Should we build a Peachy Dietary Planner app?

Your feedback helps us decide what to build next

\n'; return new Blob([html], { type: 'text/html' }); } function formatAsRTF(text) { const escaped = text.replace(/\\/g, '\\\\').replace(/\{/g, '\\{').replace(/\}/g, '\\}'); const lines = escaped.split('\n'); let rtf = '{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 Calibri;}}\n\\f0\\fs24\n'; lines.forEach(line => { rtf += line + '\\par\n'; }); rtf += '}'; return new Blob([rtf], { type: 'application/rtf' }); } function formatAsPDF(text) { const lines = text.split('\n'); const pageLines = []; const linesPerPage = 50; for (let i = 0; i < lines.length; i += linesPerPage) { pageLines.push(lines.slice(i, i + linesPerPage)); } if (pageLines.length === 0) pageLines.push(['']); // Build minimal PDF let pdf = '%PDF-1.4\n'; const objects = []; let objNum = 1; // Catalog objects.push(objNum + ' 0 obj\n<< /Type /Catalog /Pages ' + (objNum+1) + ' 0 R >>\nendobj\n'); objNum++; // Pages const pagesObj = objNum; const pageRefs = []; objNum++; // Font const fontObj = objNum; objects.push(objNum + ' 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n'); objNum++; // Create each page for (let p = 0; p < pageLines.length; p++) { const contentObj = objNum; const pageText = pageLines[p].map((line, idx) => { const escaped = line.replace(/\\/g, '\\\\').replace(/\(/g, '\\(').replace(/\)/g, '\\)'); const y = 780 - idx * 14; return 'BT /F1 11 Tf 40 ' + y + ' Td (' + escaped + ') Tj ET'; }).join('\n'); const streamBytes = new TextEncoder().encode(pageText); objects.push(objNum + ' 0 obj\n<< /Length ' + streamBytes.length + ' >>\nstream\n' + pageText + '\nendstream\nendobj\n'); objNum++; const pageObj = objNum; pageRefs.push(pageObj + ' 0 R'); objects.push(objNum + ' 0 obj\n<< /Type /Page /Parent ' + pagesObj + ' 0 R /MediaBox [0 0 612 792] /Contents ' + contentObj + ' 0 R /Resources << /Font << /F1 ' + fontObj + ' 0 R >> >> >>\nendobj\n'); objNum++; } // Insert pages object objects.splice(1, 0, pagesObj + ' 0 obj\n<< /Type /Pages /Kids [' + pageRefs.join(' ') + '] /Count ' + pageLines.length + ' >>\nendobj\n'); // Assemble PDF let body = ''; const offsets = []; for (const obj of objects) { offsets.push(pdf.length + body.length); body += obj; } pdf += body; // xref const xrefOffset = pdf.length; let xref = 'xref\n0 ' + (objects.length + 1) + '\n0000000000 65535 f \n'; for (const off of offsets) { xref += String(off).padStart(10, '0') + ' 00000 n \n'; } pdf += xref; pdf += 'trailer\n<< /Size ' + (objects.length + 1) + ' /Root 1 0 R >>\nstartxref\n' + xrefOffset + '\n%%EOF'; return new Blob([pdf], { type: 'application/pdf' }); } function formatAsDOCX(text) { // Build minimal DOCX (ZIP containing XML) const paragraphs = text.split('\n').map(line => { const escaped = escapeXML(line); return '' + escaped + ''; }).join(''); const contentTypes = ''; const rels = ''; const document = '' + paragraphs + ''; const wordRels = ''; // Build ZIP manually const files = [ { name: '[Content_Types].xml', data: new TextEncoder().encode(contentTypes) }, { name: '_rels/.rels', data: new TextEncoder().encode(rels) }, { name: 'word/document.xml', data: new TextEncoder().encode(document) }, { name: 'word/_rels/document.xml.rels', data: new TextEncoder().encode(wordRels) }, ]; return buildZip(files); } function buildZip(files) { const parts = []; const centralParts = []; let offset = 0; for (const file of files) { const nameBytes = new TextEncoder().encode(file.name); const data = file.data; // Local file header const header = new Uint8Array(30 + nameBytes.length); const view = new DataView(header.buffer); view.setUint32(0, 0x04034b50, true); // signature view.setUint16(4, 20, true); // version needed view.setUint16(6, 0, true); // flags view.setUint16(8, 0, true); // compression (store) view.setUint16(10, 0, true); // mod time view.setUint16(12, 0, true); // mod date view.setUint32(14, crc32(data), true); // crc32 view.setUint32(18, data.length, true); // compressed size view.setUint32(22, data.length, true); // uncompressed size view.setUint16(26, nameBytes.length, true); // name length view.setUint16(28, 0, true); // extra length header.set(nameBytes, 30); // Central directory entry const central = new Uint8Array(46 + nameBytes.length); const cview = new DataView(central.buffer); cview.setUint32(0, 0x02014b50, true); cview.setUint16(4, 20, true); cview.setUint16(6, 20, true); cview.setUint16(8, 0, true); cview.setUint16(10, 0, true); cview.setUint16(12, 0, true); cview.setUint16(14, 0, true); cview.setUint32(16, crc32(data), true); cview.setUint32(20, data.length, true); cview.setUint32(24, data.length, true); cview.setUint16(28, nameBytes.length, true); cview.setUint16(30, 0, true); cview.setUint16(32, 0, true); cview.setUint16(34, 0, true); cview.setUint16(36, 0, true); cview.setUint32(38, 0, true); cview.setUint32(42, offset, true); central.set(nameBytes, 46); parts.push(header, data); centralParts.push(central); offset += header.length + data.length; } const centralOffset = offset; let centralSize = 0; for (const cp of centralParts) { parts.push(cp); centralSize += cp.length; } // End of central directory const eocd = new Uint8Array(22); const eview = new DataView(eocd.buffer); eview.setUint32(0, 0x06054b50, true); eview.setUint16(4, 0, true); eview.setUint16(6, 0, true); eview.setUint16(8, files.length, true); eview.setUint16(10, files.length, true); eview.setUint32(12, centralSize, true); eview.setUint32(16, centralOffset, true); eview.setUint16(20, 0, true); parts.push(eocd); return new Blob(parts, { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }); } function crc32(data) { let crc = 0xFFFFFFFF; for (let i = 0; i < data.length; i++) { crc ^= data[i]; for (let j = 0; j < 8; j++) { crc = (crc >>> 1) ^ (crc & 1 ? 0xEDB88320 : 0); } } return (crc ^ 0xFFFFFFFF) >>> 0; } // ================== // SHOW RESULT // ================== function showResult(blob, text) { const preview = text.length > 2000 ? text.substring(0, 2000) + '\n\n... (' + text.length + ' characters total)' : text; resultPreview.innerHTML = '
' + preview.replace(/&/g, '&').replace(//g, '>') + '
'; const words = text.split(/\s+/).filter(w => w).length; const lines = text.split('\n').filter(l => l.trim()).length; resultMeta.textContent = words + ' words \u2022 ' + lines + ' lines \u2022 ' + formatSize(blob.size) + ' (' + selectedFormat.label + ')'; const baseName = selectedFile.name.replace(/\.[^.]+$/, ''); const outputName = baseName + '-extracted.' + selectedFormat.ext; const url = URL.createObjectURL(blob); downloadBtn.href = url; downloadBtn.download = outputName; downloadBtn.textContent = '\u2B07 Download ' + outputName; resultSection.classList.add('visible'); progressBar.classList.remove('visible'); } // Desktop dropdown expandable groups document.querySelectorAll('.nav-dropdown-parent').forEach(btn => { btn.addEventListener('click', function(e) { e.stopPropagation(); this.classList.toggle('open'); const sub = this.nextElementSibling; if (sub) sub.classList.toggle('open'); }); }); // ================== // NAV (same as other pages) // ================== const hamburger = document.getElementById('hamburgerMenu'); const mobileMenu = document.getElementById('mobileNavMenu'); hamburger.addEventListener('click', () => { hamburger.classList.toggle('active'); mobileMenu.classList.toggle('active'); hamburger.setAttribute('aria-expanded', mobileMenu.classList.contains('active')); }); document.querySelectorAll('.mobile-parent').forEach(btn => { btn.addEventListener('click', () => { btn.classList.toggle('open'); const sub = btn.nextElementSibling; if (sub) sub.classList.toggle('open'); }); }); document.addEventListener('click', e => { if (!mobileMenu.contains(e.target) && !hamburger.contains(e.target)) { hamburger.classList.remove('active'); mobileMenu.classList.remove('active'); hamburger.setAttribute('aria-expanded', 'false'); } }); // ================== // VOTE / POLL // ================== var VOTE_API = 'https://us-central1-peachytechnologies-web.cloudfunctions.net/submitVote'; var VOTE_COUNTS_API = 'https://us-central1-peachytechnologies-web.cloudfunctions.net/getVoteCounts'; var userVote = null; function showVoteResults(counts) { var total = counts.yes + counts.no; var yesPct = total > 0 ? Math.round((counts.yes / total) * 100) : 0; var noPct = total > 0 ? 100 - yesPct : 0; document.getElementById('voteYesPct').textContent = yesPct + '%'; document.getElementById('voteNoPct').textContent = noPct + '%'; document.getElementById('voteYesBar').style.width = yesPct + '%'; document.getElementById('voteNoBar').style.width = noPct + '%'; document.getElementById('voteTotalText').textContent = total >= 1000 ? total.toLocaleString() + ' votes cast' : ''; document.getElementById('voteStateNew').style.display = 'none'; document.getElementById('voteStateResults').style.display = 'block'; } document.querySelectorAll('.vote-btn').forEach(function(btn) { btn.addEventListener('click', function() { userVote = this.dataset.vote; document.getElementById('voteButtons').style.display = 'none'; document.getElementById('voteForm').style.display = 'block'; document.getElementById('voteConfirmed').textContent = userVote === 'yes' ? '\uD83D\uDC4D Great to hear!' : '\uD83D\uDC4E Thanks for being honest!'; }); }); document.getElementById('voteSubmit').addEventListener('click', async function() { var suggestion = document.getElementById('voteSuggestion').value.trim(); this.disabled = true; this.textContent = 'Submitting...'; try { var resp = await fetch(VOTE_API, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ vote: userVote, suggestion: suggestion || null, page: 'text-extractor', timestamp: new Date().toISOString() }) }); var data = await resp.json(); if (data.counts) showVoteResults(data.counts); } catch(e) { document.getElementById('voteStateNew').style.display = 'none'; document.getElementById('voteStateResults').style.display = 'block'; } localStorage.setItem('peachy_vote_dietary_planner', 'submitted'); }); if (localStorage.getItem('peachy_vote_dietary_planner') === 'submitted') { fetch(VOTE_COUNTS_API).then(function(r) { return r.json(); }).then(function(c) { showVoteResults(c); }) .catch(function() { document.getElementById('voteStateNew').style.display = 'none'; document.getElementById('voteStateResults').style.display = 'block'; }); } });