fix: add progress bar for OCR (#1444)

This commit is contained in:
Nolan Lawson 2019-08-27 23:23:35 -07:00 committed by GitHub
parent c822f19975
commit c8738f17b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 5 deletions

View File

@ -35,6 +35,11 @@
{/if}
</span>
</button>
<LengthGauge
length={extractionProgress}
overLimit={false}
max={100}
/>
</div>
<style>
.media-alt-editor {
@ -124,7 +129,8 @@
rawText: '',
mediaAltCharLimit: MEDIA_ALT_CHAR_LIMIT,
extracting: false,
className: ''
className: '',
extractionProgress: 0
}),
computed: {
length: ({ rawText }) => length(rawText || ''),
@ -176,17 +182,22 @@
this.set({ extracting: true })
try {
const { url } = this.get()
const onProgress = progress => {
requestAnimationFrame(() => {
this.set({ extractionProgress: progress * 100 })
})
}
const file = mediaUploadFileCache.get(url)
let text
if (file) { // Avoid downloading from the network a file that the user *just* uploaded
const fileUrl = URL.createObjectURL(file)
try {
text = await runTesseract(fileUrl)
text = await runTesseract(fileUrl, onProgress)
} finally {
URL.revokeObjectURL(fileUrl)
}
} else {
text = await runTesseract(url)
text = await runTesseract(url, onProgress)
}
const { media, index, realm } = this.get()
if (media[index].description !== text) {
@ -201,6 +212,11 @@
)
} finally {
this.set({ extracting: false })
setTimeout(() => {
requestAnimationFrame(() => {
this.set({ extractionProgress: 0 })
})
}, 400)
}
}
},

View File

@ -1,6 +1,25 @@
import { importTesseractWorker } from '../_utils/asyncModules'
export async function runTesseract (url) {
// TODO: it's flaky to try to estimate tesseract's total progress this way
const steps = [
{ status: 'loading tesseract core', proportion: 0.05 },
{ status: 'initializing tesseract', proportion: 0.05 },
{ status: 'loading language traineddata', proportion: 0.1 },
{ status: 'initializing api', proportion: 0.2 },
{ status: 'recognizing text', proportion: 0.6 }
]
function getTotalProgress (progressInfo) {
const idx = steps.findIndex(({ status }) => progressInfo.status === status)
let total = 0
for (let i = 0; i < idx; i++) {
total += steps[i].proportion
}
total += steps[idx].proportion * progressInfo.progress
return total
}
export async function runTesseract (url, onProgress) {
const worker = await importTesseractWorker()
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
@ -13,7 +32,12 @@ export async function runTesseract (url) {
} finally {
window.Blob = OldBlob
}
promise.progress(_ => console.log('progress', _))
promise.progress(progressInfo => {
console.log('progress', progressInfo)
if (onProgress && steps.find(({ status }) => status === progressInfo.status)) {
onProgress(getTotalProgress(progressInfo))
}
})
const res = await promise
return res.text
}