From 45c150c445d65218d33ed61d70a3fc3b8ba8fdab Mon Sep 17 00:00:00 2001 From: max ross Date: Mon, 11 May 2026 21:28:19 -0400 Subject: [PATCH] Add DocumentAI parsing wrappers --- src/module/DocumentAI/index.ts | 68 ++++++++++++- src/module/DocumentAI/type.ts | 178 +++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 1 deletion(-) diff --git a/src/module/DocumentAI/index.ts b/src/module/DocumentAI/index.ts index 06f68f4..ed539a9 100644 --- a/src/module/DocumentAI/index.ts +++ b/src/module/DocumentAI/index.ts @@ -2,15 +2,81 @@ import { makeFormData } from 'koajax'; import { BaseModel, RESTClient, toggle } from 'mobx-restful'; import { LarkData } from '../../type'; -import { TaxiInvoice, TrainInvoice, VatInvoice, VehicleInvoice } from './type'; +import { + BankCard, + ContractFieldExtraction, + ContractOCRMode, + Resume, + TaxiInvoice, + TrainInvoice, + VatInvoice, + VehicleInvoice +} from './type'; export * from './type'; export abstract class DocumentAIModel extends BaseModel { baseURI = 'document_ai/v1'; + ocrBaseURI = 'optical_char_recognition/v1'; abstract client: RESTClient; + /** + * @see {@link https://open.feishu.cn/document/server-docs/ai/optical_char_recognition-v1/basic_recognize} + */ + @toggle('uploading') + async recognizeImageText(image: string) { + const { body } = await this.client.post>( + `${this.ocrBaseURI}/image/basic_recognize`, + { image } + ); + + return body!.data!.text_list; + } + + /** + * @see {@link https://open.feishu.cn/document/ai/document_ai-v1/bank_card/recognize} + */ + @toggle('uploading') + async recognizeBankCard(file: File) { + const { body } = await this.client.post>( + `${this.baseURI}/bank_card/recognize`, + makeFormData({ file }) + ); + + return body!.data!.bank_card; + } + + /** + * @see {@link https://open.feishu.cn/document/ai/document_ai-v1/resume/parse} + */ + @toggle('uploading') + async parseResumes(file: File) { + const { body } = await this.client.post>( + `${this.baseURI}/resume/parse`, + makeFormData({ file }) + ); + + return body!.data!.resumes; + } + + /** + * @see {@link https://open.feishu.cn/document/server-docs/ai/document_ai-v1/contract/field_extraction} + */ + @toggle('uploading') + async extractContractFields( + file: File, + pdf_page_limit = 100, + ocr_mode: ContractOCRMode = 'auto' + ) { + const { body } = await this.client.post>( + `${this.baseURI}/contract/field_extraction`, + makeFormData({ file, pdf_page_limit, ocr_mode }) + ); + + return body!.data!; + } + /** * @see {@link https://open.feishu.cn/document/ai/document_ai-v1/vat_invoice/recognize} */ diff --git a/src/module/DocumentAI/type.ts b/src/module/DocumentAI/type.ts index fdb682e..332ae42 100644 --- a/src/module/DocumentAI/type.ts +++ b/src/module/DocumentAI/type.ts @@ -1,3 +1,8 @@ +export interface DocumentAIEntity { + type: T; + value: string; +} + export type InvoiceEntityType = | `invoice_${'code' | 'no' | 'special_seal'}` | `seller_${'name' | 'taxpayer_no'}_in_seal`; @@ -67,3 +72,176 @@ export interface VehicleInvoice { | `total_price${'' | '_little'}`; value: string; } + +export type BankCardEntityType = 'card_number' | 'date_of_expiry'; + +export interface BankCard { + entities: DocumentAIEntity[]; +} + +export interface ResumeEducation { + school: string; + start_date: string; + start_time: string; + end_date: string; + end_time: string; + major: string; + degree: string; + qualification: number; +} + +export interface ResumeCareer { + company: string; + start_date: string; + start_time: string; + end_date: string; + end_time: string; + title: string; + type: number; + type_str: string; + job_description: string; +} + +export interface ResumeProject { + name: string; + title: string; + start_date: string; + start_time: string; + end_date: string; + end_time: string; + description: string; +} + +export interface ResumeLanguage { + level: number; + description: string; +} + +export interface ResumeAward { + award: string; + date: string; + description: string; +} + +export interface ResumeCertificate { + name: string; + desc: string; +} + +export interface ResumeCompetition { + name: string; + desc: string; +} + +export interface Resume { + file_md5: string; + content: string; + new_content: string; + name: string; + email: string; + mobile: string; + mobile_is_virtual: boolean; + country_code: string; + educations: ResumeEducation[]; + careers: ResumeCareer[]; + projects: ResumeProject[]; + work_year: number; + date_of_birth: string; + gender: number; + willing_positions: string[]; + current_location: string; + willing_locations: string[]; + home_location: string; + languages: ResumeLanguage[]; + awards: ResumeAward[]; + certificates: ResumeCertificate[]; + competitions: ResumeCompetition[]; + self_evaluation: string; + urls: string[]; + social_links: string[]; +} + +export type ContractOCRMode = 'force' | 'auto' | 'unused'; + +export interface ContractExtractPrice { + contract_price: number; + contract_price_original: string; + text: string; +} + +export interface ContractExtractTerm { + initial_time: string; + initial_unit: string; +} + +export interface ContractExtractTime { + time_start: string; + time_end: string; + original_time_start: string; + original_time_end: string; + text_start: string; + text_end: string; + initial_term: ContractExtractTerm; + text_initial_term: string; +} + +export interface ContractExtractCopy { + copy_num: number; + original_copy: string; + key: string; + text: string; +} + +export interface ContractExtractCurrency { + currency_name: string; + currency_text: string; +} + +export interface ContractBodyEntity { + address: string; + contacts: string; + email: string; + phone: string; + id_number: string; + legal_representative: string; + party: string; +} + +export type ContractPartyType = 'buy' | 'sell' | 'third'; + +export interface ContractBodyInfo { + body_type: ContractPartyType; + value: ContractBodyEntity; +} + +export interface ContractBankEntity { + account_name: string; + bank_name: string; + account_number: string; + phone: string; + contacts: string; + tax_number: string; + address: string; + id_number: string; + email: string; +} + +export interface ContractBankInfo { + bank_type: + | ContractPartyType + | `${ContractPartyType}_bank` + | 'uncertain_bank' + | 'unceratin_bank'; + value: ContractBankEntity; +} + +export interface ContractFieldExtraction { + file_id: string; + price: ContractExtractPrice; + time: ContractExtractTime; + copy: ContractExtractCopy; + currency: ContractExtractCurrency; + header: string; + body_info: ContractBodyInfo[]; + bank_info: ContractBankInfo[]; +}