fix: fields content language detection not working properly

This commit is contained in:
Fabio Di Stasio 2022-07-09 12:39:44 +02:00
parent 7537dff401
commit a91fa8ff54
4 changed files with 195 additions and 28 deletions

14
package-lock.json generated
View File

@ -14,7 +14,6 @@
"@faker-js/faker": "~6.1.2",
"@mdi/font": "~6.9.96",
"@turf/helpers": "~6.5.0",
"@vscode/vscode-languagedetection": "~1.0.21",
"@vueuse/core": "~8.7.5",
"ace-builds": "~1.4.13",
"better-sqlite3": "~7.5.1",
@ -3096,14 +3095,6 @@
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
}
},
"node_modules/@vscode/vscode-languagedetection": {
"version": "1.0.21",
"resolved": "https://registry.npmjs.org/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.21.tgz",
"integrity": "sha512-zSUH9HYCw5qsCtd7b31yqkpaCU6jhtkKLkvOOA8yTrIRfBSOFb8PPhgmMicD7B/m+t4PwOJXzU1XDtrM9Fd3/g==",
"bin": {
"vscode-languagedetection": "cli/index.js"
}
},
"node_modules/@vue/compiler-core": {
"version": "3.2.37",
"resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.2.37.tgz",
@ -19988,11 +19979,6 @@
}
}
},
"@vscode/vscode-languagedetection": {
"version": "1.0.21",
"resolved": "https://registry.npmjs.org/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.21.tgz",
"integrity": "sha512-zSUH9HYCw5qsCtd7b31yqkpaCU6jhtkKLkvOOA8yTrIRfBSOFb8PPhgmMicD7B/m+t4PwOJXzU1XDtrM9Fd3/g=="
},
"@vue/compiler-core": {
"version": "3.2.37",
"resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.2.37.tgz",

View File

@ -119,7 +119,6 @@
"@faker-js/faker": "~6.1.2",
"@mdi/font": "~6.9.96",
"@turf/helpers": "~6.5.0",
"@vscode/vscode-languagedetection": "~1.0.21",
"@vueuse/core": "~8.7.5",
"ace-builds": "~1.4.13",
"better-sqlite3": "~7.5.1",

View File

@ -0,0 +1,182 @@
function isJSON (str: string) {
try {
if (!['{', '['].includes(str.trim()[0]))
return false;
JSON.parse(str);
return true;
}
catch (_) {
return false;
}
}
function isHTML (str: string) {
const tags = [
'a',
'abbr',
'address',
'area',
'article',
'aside',
'audio',
'b',
'base',
'bdi',
'bdo',
'blockquote',
'body',
'br',
'button',
'canvas',
'caption',
'cite',
'code',
'col',
'colgroup',
'data',
'datalist',
'dd',
'del',
'details',
'dfn',
'dialog',
'div',
'dl',
'dt',
'em',
'embed',
'fieldset',
'figcaption',
'figure',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'header',
'hgroup',
'hr',
'html',
'i',
'iframe',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'link',
'main',
'map',
'mark',
'math',
'menu',
'menuitem',
'meta',
'meter',
'nav',
'noscript',
'object',
'ol',
'optgroup',
'option',
'output',
'p',
'param',
'picture',
'pre',
'progress',
'q',
'rb',
'rp',
'rt',
'rtc',
'ruby',
's',
'samp',
'script',
'section',
'select',
'slot',
'small',
'source',
'span',
'strong',
'style',
'sub',
'summary',
'sup',
'svg',
'table',
'tbody',
'td',
'template',
'textarea',
'tfoot',
'th',
'thead',
'time',
'title',
'tr',
'track',
'u',
'ul',
'var',
'video',
'wbr'
];
const doc = new DOMParser().parseFromString(str, 'text/html');
if (Array.from(doc.body.childNodes).some(node => node.nodeType === 1))
return tags.some((tag) => str.includes(`<${tag}>`));
return false;
}
function isXML (str: string) {
const doc = new DOMParser().parseFromString(str, 'text/xml');
const errorNode = doc.querySelector('parsererror');
return !errorNode;
}
function isMarkdown (str: string) {
const mdChecks = [
'# ',
'`',
'- ',
'+ ',
'* ',
'1. ',
'**',
'__',
'~~',
'>> ',
'](http',
'![',
'[ ]',
'[x]'
];
return mdChecks.some((tag) => str.includes(tag));
}
export function langDetector (str: string) {
if (!str.trim().length)
return 'text';
if (isJSON(str))
return 'json';
if (isHTML(str))
return 'html';
if (isXML(str))
return 'xml';
if (isMarkdown(str))
return 'markdown';
return 'text';
}

View File

@ -194,9 +194,9 @@
import { computed, onBeforeUnmount, Prop, ref, Ref, watch, nextTick } from 'vue';
import { useI18n } from 'vue-i18n';
import * as moment from 'moment';
import { ModelOperations } from '@vscode/vscode-languagedetection';
import { mimeFromHex } from 'common/libs/mimeFromHex';
import { formatBytes } from 'common/libs/formatBytes';
import { langDetector } from 'common/libs/langDetector';
import { bufferToBase64 } from 'common/libs/bufferToBase64';
import hexToBinary, { HexChar } from 'common/libs/hexToBinary';
import {
@ -604,19 +604,19 @@ watch(() => props.fields, () => {
});
watch(isTextareaEditor, (val) => {
if (val) {
const modelOperations = new ModelOperations();
(async () => {
const detected = await modelOperations.runModel(editingContent.value);
const filteredLanguages = detected.filter(dLang =>
availableLanguages.value.some(aLang => aLang.id === dLang.languageId) &&
dLang.confidence > 0.1
);
if (val)
editorMode.value = langDetector(editingContent.value);
// const modelOperations = new ModelOperations();
// (async () => {
// const detected = await modelOperations.runModel(editingContent.value);
// const filteredLanguages = detected.filter(dLang =>
// availableLanguages.value.some(aLang => aLang.id === dLang.languageId) &&
// dLang.confidence > 0.1
// );
if (filteredLanguages.length)
editorMode.value = availableLanguages.value.find(lang => lang.id === filteredLanguages[0].languageId).slug;
})();
}
// if (filteredLanguages.length)
// editorMode.value = availableLanguages.value.find(lang => lang.id === filteredLanguages[0].languageId).slug;
// })();
});
watch(() => props.selected, (isSelected) => {