This repository has been archived by the owner on Jul 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
31 changed files
with
1,055 additions
and
192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import { v4 as uuid } from "uuid"; | ||
|
||
export interface Metadata { | ||
[key: string]: any; | ||
} | ||
|
||
export class Document { | ||
metadata: Metadata = {}; | ||
|
||
constructor(metadata?: Metadata) { | ||
if (metadata) { | ||
this.metadata = metadata; | ||
} | ||
} | ||
} | ||
|
||
export class TextDocument extends Document { | ||
text: string; | ||
embedding?: number[]; | ||
|
||
constructor(text: string, metadata?: Metadata) { | ||
super(metadata); | ||
this.text = text; | ||
} | ||
} | ||
|
||
export class ImageDocument extends Document { | ||
image: Uint8Array; // blob | ||
embedding?: number[]; | ||
|
||
constructor(image: Uint8Array, metadata?: Metadata) { | ||
super(metadata); | ||
this.image = image; | ||
} | ||
} | ||
|
||
export class AudioDocument extends Document { | ||
audio: Uint8Array; // blob | ||
embedding?: number[]; | ||
|
||
constructor(audio: Uint8Array, metadata?: Metadata) { | ||
super(metadata); | ||
this.audio = audio; | ||
} | ||
} | ||
|
||
// example of a multi-modal document | ||
class MultiModalDocument extends Document { | ||
textDoc: TextDocument; | ||
imageDoc: ImageDocument; | ||
|
||
constructor( | ||
textDoc: TextDocument, | ||
imageDoc: ImageDocument, | ||
metadata?: Metadata | ||
) { | ||
super(metadata); | ||
this.textDoc = textDoc; | ||
this.imageDoc = imageDoc; | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,66 @@ | ||
import { v4 as uuid } from "uuid"; | ||
import fs from "fs"; | ||
import { | ||
AudioDocument, | ||
Document, | ||
ImageDocument, | ||
TextDocument, | ||
} from "src/documents/Document"; | ||
import { Loader } from "."; | ||
|
||
export class FileLoader implements Loader { | ||
path: string; | ||
meta?: Record<string, any>; | ||
async load(filepaths: string[]): Promise<Document[]> { | ||
const documents: Document[] = []; | ||
|
||
constructor(path: string, meta?: Record<string, any>) { | ||
this.path = path; | ||
this.meta = meta; | ||
} | ||
for (const filepath of filepaths) { | ||
try { | ||
const stat = fs.statSync(filepath); | ||
|
||
if (stat.isDirectory()) { | ||
const files = fs.readdirSync(filepath); | ||
const dirDocuments = await this.load( | ||
files.map((f) => `${filepath}/${f}`) | ||
); | ||
documents.push(...dirDocuments); | ||
} else { | ||
const data = await fs.promises.readFile(filepath); | ||
const filename = filepath.split("/").pop(); | ||
const extension = filename?.split(".").pop(); | ||
if (!extension) { | ||
throw new Error(`No extension found for file ${filepath}`); | ||
} | ||
|
||
let document: Document; | ||
|
||
switch (extension) { | ||
case "png": | ||
case "jpg": | ||
case "jpeg": | ||
case "svg": | ||
document = new ImageDocument(data, { source: filepath }); | ||
break; | ||
case "wav": | ||
case "mp4": | ||
case "mp3": | ||
document = new AudioDocument(data, { source: filepath }); | ||
break; | ||
case "txt": | ||
case "csv": | ||
case "json": | ||
default: | ||
document = new TextDocument(data.toString(), { | ||
source: filepath, | ||
}); | ||
break; | ||
} | ||
|
||
documents.push(document); | ||
} | ||
} catch (error) { | ||
console.error(`Error loading file ${filepath}: ${error}`); | ||
} | ||
} | ||
|
||
async load() { | ||
const data = await fs.promises.readFile(this.path, "utf-8"); | ||
return [ | ||
{ | ||
data, | ||
meta: { | ||
source: this.path, | ||
...this.meta, | ||
}, | ||
}, | ||
]; | ||
return documents; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import { Octokit } from "@octokit/rest"; | ||
import { TextDocument } from "src/documents/Document"; | ||
import { Loader } from "."; | ||
|
||
export class GithubLoader implements Loader { | ||
private octokit: Octokit; | ||
|
||
constructor(token: string) { | ||
this.octokit = new Octokit({ auth: token }); | ||
} | ||
|
||
async load(repo: string, path: string, ref?: string) { | ||
const { data } = await this.octokit.repos.getContent({ | ||
owner: repo.split("/")[0], | ||
repo: repo.split("/")[1], | ||
path, | ||
ref, | ||
}); | ||
if (Array.isArray(data)) { | ||
// handle directory | ||
const documents: any[] = await Promise.all( | ||
data.map(async (item: any) => await this.load(repo, item.path, ref)) | ||
); | ||
|
||
return documents.flat(); | ||
} else { | ||
// handle file | ||
const { data: file } = await this.octokit.repos.getContent({ | ||
owner: repo.split("/")[0], | ||
repo: repo.split("/")[1], | ||
path: data.path, | ||
ref, | ||
}); | ||
|
||
//@ts-ignore | ||
const text = Buffer.from(file.content || "", "base64").toString(); | ||
const source = `https://github.com/${repo}/blob/${ref}/${path}`; | ||
return [new TextDocument(text, { source })]; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import { google } from "googleapis"; | ||
import { TextDocument } from "src/documents/Document"; | ||
import { Loader } from "."; | ||
|
||
export class GmailLoader implements Loader { | ||
private gmail: any; | ||
|
||
constructor(credentials: any, token: any) { | ||
const auth = new google.auth.OAuth2( | ||
credentials.client_id, | ||
credentials.client_secret, | ||
credentials.redirect_uris[0] | ||
); | ||
auth.setCredentials(token); | ||
this.gmail = google.gmail({ version: "v1", auth }); | ||
} | ||
|
||
async load(messageId: string) { | ||
const { data: message } = await this.gmail.users.messages.get({ | ||
userId: "me", | ||
id: messageId, | ||
format: "full", | ||
}); | ||
const text = message.payload.parts | ||
.map((part: any) => part.body.data) | ||
.join(""); | ||
const headers = message.payload.headers.reduce((acc: any, header: any) => { | ||
acc[header.name] = header.value; | ||
return acc; | ||
}, {}); | ||
return [ | ||
new TextDocument(text, { | ||
source: `https://mail.google.com/mail/u/0/#inbox/${messageId}`, | ||
headers, | ||
}), | ||
]; | ||
} | ||
} |
Oops, something went wrong.