Skip to content

Commit

Permalink
Facts with cores, de-dupe and weight (#1483)
Browse files Browse the repository at this point in the history
Issue: #1422

## TODOs
- [x] add support for Core facts `Fundamental personal information like
age, city of residence, marital status, and health`
- [x] ~~de-dupe facts (already have)~~
- [x] facts with weight, response top facts base on weight, llm
populated base on weight as well
- [x] limits top non-duplicated x facts, not 5000, x < 200 too many
facts is worthless
- [x] ~~`minor` only extracts facts from the User's transcription, or
speaker0 (already have)~~
- [x] `minor` push notification to the user when finding new facts so
that they can evaluate it

## Deploy steps
- [ ] Merge PR
- [ ] Create index on facts
#1483 (comment)
- [ ] Run migration backend > scripts > rag > facts.py >
script_migrate_fact_scoring
- [ ] Deploy backend
- [ ] Deploy pusher
- [ ] Deploy app
  • Loading branch information
beastoin authored Dec 6, 2024
2 parents 57e18d8 + 44df761 commit e25db62
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 16 deletions.
4 changes: 2 additions & 2 deletions app/lib/backend/http/api/facts.dart
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ Future<bool> createFact(String content, FactCategory category) async {
return response.statusCode == 200;
}

Future<List<Fact>> getFacts({int limit = 5000, int offset = 0}) async {
Future<List<Fact>> getFacts({int limit = 100, int offset = 0}) async {
var response = await makeApiCall(
url: '${Env.apiBaseUrl}v1/facts', // limit=$limit&offset=$offset
url: '${Env.apiBaseUrl}v2/facts?limit=${limit}&offset=${offset}',
headers: {},
method: 'GET',
body: '',
Expand Down
10 changes: 7 additions & 3 deletions app/lib/pages/home/page.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@ import 'package:friend_private/backend/http/api/users.dart';
import 'package:friend_private/backend/preferences.dart';
import 'package:friend_private/backend/schema/geolocation.dart';
import 'package:friend_private/main.dart';
import 'package:friend_private/pages/apps/page.dart';
import 'package:friend_private/pages/chat/page.dart';
import 'package:friend_private/pages/facts/page.dart';
import 'package:friend_private/pages/home/widgets/chat_apps_dropdown_widget.dart';
import 'package:friend_private/pages/home/widgets/speech_language_sheet.dart';
import 'package:friend_private/pages/memories/page.dart';
import 'package:friend_private/pages/apps/page.dart';
import 'package:friend_private/pages/settings/page.dart';
import 'package:friend_private/providers/app_provider.dart';
import 'package:friend_private/providers/capture_provider.dart';
import 'package:friend_private/providers/connectivity_provider.dart';
import 'package:friend_private/providers/device_provider.dart';
import 'package:friend_private/providers/home_provider.dart';
import 'package:friend_private/providers/memory_provider.dart' as mp;
import 'package:friend_private/providers/memory_provider.dart';
import 'package:friend_private/providers/message_provider.dart';
import 'package:friend_private/providers/app_provider.dart';
import 'package:friend_private/services/notifications.dart';
import 'package:friend_private/utils/analytics/analytics_manager.dart';
import 'package:friend_private/utils/analytics/mixpanel.dart';
Expand Down Expand Up @@ -116,7 +117,10 @@ class _HomePageState extends State<HomePage> with WidgetsBindingObserver, Ticker
}

///Screens with respect to subpage
final Map<String, Widget> screensWithRespectToPath = {'/settings': const SettingsPage()};
final Map<String, Widget> screensWithRespectToPath = {
'/settings': const SettingsPage(),
'/facts': const FactsPage(),
};
bool? previousConnection;

void _onReceiveTaskData(dynamic data) async {
Expand Down
10 changes: 8 additions & 2 deletions app/lib/services/notifications.dart
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ class NotificationService {
// Plugin
if (data.isNotEmpty) {
late Map<String, String> payload = <String, String>{};
payload.addAll({
"navigate_to": data['navigate_to'] ?? "",
});

// plugin, daily summary
final notificationType = data['notification_type'];
if (notificationType == 'plugin' || notificationType == 'daily_summary') {
data['from_integration'] = data['from_integration'] == 'true';
Expand Down Expand Up @@ -244,7 +249,6 @@ class NotificationUtil {
}

static Future<void> onActionReceivedMethodImpl(ReceivedAction receivedAction) async {
debugPrint("onActionReceivedMethodImpl");
if (receivedAction.payload == null || receivedAction.payload!.isEmpty) {
return;
}
Expand All @@ -265,8 +269,11 @@ class NotificationUtil {

// Always ensure that all plugins was initialized
WidgetsFlutterBinding.ensureInitialized();

if (payload.containsKey('navigateTo')) {
SharedPreferencesUtil().subPageToShowFromNotification = payload['navigateTo'] ?? '';
} else if (payload.containsKey('navigate_to')) {
SharedPreferencesUtil().subPageToShowFromNotification = payload['navigate_to'] ?? '';
}

// Notification page
Expand All @@ -281,7 +288,6 @@ class NotificationUtil {

SharedPreferencesUtil().pageToShowFromNotification = pageIdx;

debugPrint("onActionReceivedMethodImpl ${SharedPreferencesUtil().pageToShowFromNotification}");
MyApp.navigatorKey.currentState?.pushReplacement(
MaterialPageRoute(builder: (context) => const HomePageWrapper(openAppFromNotification: true)));
}
Expand Down
15 changes: 13 additions & 2 deletions backend/database/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,26 @@ def get_facts(uid: str, limit: int = 100, offset: int = 0):
print('get_facts', uid, limit, offset)
facts_ref = db.collection('users').document(uid).collection('facts')
facts_ref = (
facts_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
facts_ref.order_by('scoring', direction=firestore.Query.DESCENDING)
.order_by('created_at', direction=firestore.Query.DESCENDING)
.where(filter=FieldFilter('deleted', '==', False))
# .where(filter=FieldFilter('user_review', '!=', False))
)
facts_ref = facts_ref.limit(limit).offset(offset)
# TODO: put user review to firestore query
facts = [doc.to_dict() for doc in facts_ref.stream()]
result = [fact for fact in facts if fact['user_review'] is not False]
return result

def get_non_filtered_facts(uid: str, limit: int = 100, offset: int = 0):
print('get_non_filtered_facts', uid, limit, offset)
facts_ref = db.collection('users').document(uid).collection('facts')
facts_ref = (
facts_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
.where(filter=FieldFilter('deleted', '==', False))
)
facts_ref = facts_ref.limit(limit).offset(offset)
return [doc.to_dict() for doc in facts_ref.stream()]


def create_fact(uid: str, data: dict):
user_ref = db.collection('users').document(uid)
Expand Down
24 changes: 23 additions & 1 deletion backend/models/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ class FactCategory(str, Enum):
other = "other"


CATEGORY_BOOSTS = {FactCategory.core.value: 1,
FactCategory.habits.value:10,
FactCategory.work.value:40,
FactCategory.skills.value:40,
FactCategory.lifestyle.value: 40,
FactCategory.hobbies.value: 40,
FactCategory.interests.value:40,
FactCategory.other.value: 50,}

class Fact(BaseModel):
content: str = Field(description="The content of the fact")
category: FactCategory = Field(description="The category of the fact", default=FactCategory.other)
Expand Down Expand Up @@ -57,10 +66,21 @@ class FactDB(Fact):
manually_added: bool = False
edited: bool = False
deleted: bool = False
scoring: Optional[str] = None

@staticmethod
def calculate_score(fact: 'FactDB') -> 'FactDB':
cat_boost = (999 - CATEGORY_BOOSTS[fact.category.value]) if fact.category.value in CATEGORY_BOOSTS else 0

user_manual_added_boost = 1
if fact.manually_added is False:
user_manual_added_boost = 0

return "{:02d}_{:02d}_{:010d}".format(user_manual_added_boost, cat_boost, int(fact.created_at.timestamp()))

@staticmethod
def from_fact(fact: Fact, uid: str, memory_id: str, memory_category: CategoryEnum) -> 'FactDB':
return FactDB(
fact_db = FactDB(
id=document_id_from_seed(fact.content),
uid=uid,
content=fact.content,
Expand All @@ -70,3 +90,5 @@ def from_fact(fact: Fact, uid: str, memory_id: str, memory_category: CategoryEnu
memory_id=memory_id,
memory_category=memory_category,
)
fact_db.scoring = FactDB.calculate_score(fact_db)
return fact_db
4 changes: 4 additions & 0 deletions backend/models/notification_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class NotificationMessage(BaseModel):
type: str
notification_type: str
text: Optional[str] = ""
navigate_to: Optional[str] = None

@staticmethod
def get_message_as_dict(
Expand All @@ -26,4 +27,7 @@ def get_message_as_dict(
if message.plugin_id is None:
del message_dict['plugin_id']

if message.navigate_to is None:
del message_dict['navigate_to']

return message_dict
8 changes: 7 additions & 1 deletion backend/routers/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def create_fact(fact: Fact, uid: str = Depends(auth.get_current_user_uid)):


@router.get('/v1/facts', tags=['facts'], response_model=List[FactDB]) # filters
def get_facts(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
def get_facts_v1(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
facts = facts_db.get_facts(uid, limit, offset)
# facts = list(filter(lambda x: x['category'] == 'skills', facts))
# TODO: consider this "$name" part if really is an issue, when changing name or smth.
Expand All @@ -45,6 +45,12 @@ def get_facts(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_cu
# return facts


@router.get('/v2/facts', tags=['facts'], response_model=List[FactDB])
def get_facts(limit: int = 100, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
facts = facts_db.get_facts(uid, limit, offset)
return facts


@router.delete('/v1/facts/{fact_id}', tags=['facts'])
def delete_fact(fact_id: str, uid: str = Depends(auth.get_current_user_uid)):
facts_db.delete_fact(uid, fact_id)
Expand Down
44 changes: 41 additions & 3 deletions backend/scripts/rag/facts.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import database.facts as facts_db
from utils.llm import new_facts_extractor, new_learnings_extractor
import threading
from typing import Tuple

import firebase_admin

from _shared import *
from scripts.rag._shared import *
from database.auth import get_user_name
from database._client import get_users_uid
from models.facts import Fact, FactDB

firebase_admin.initialize_app()
from utils.llm import new_facts_extractor, new_learnings_extractor
import database.facts as facts_db


def get_facts_from_memories(
Expand Down Expand Up @@ -85,5 +86,42 @@ def script_migrate_users():
[t.join() for t in chunk]


# migrate scoring for facts
def migration_fact_scoring_for_user(uid: str):
print('migration_fact_scoring_for_user', uid)
offset = 0
while True:
facts_data = facts_db.get_non_filtered_facts(uid, limit=400, offset=offset)
facts = [FactDB(**d) for d in facts_data]
if not facts or len(facts) == 0:
break

print('execute_for_user', uid, 'found facts', len(facts))
for fact in facts:
fact.scoring = FactDB.calculate_score(fact)
facts_db.save_facts(uid, [fact.dict() for fact in facts])
offset += len(facts)

def script_migrate_fact_scoring_users(uids: [str]):
threads = []
for uid in uids:
t = threading.Thread(target=migration_fact_scoring_for_user, args=(uid,))
threads.append(t)

chunk_size = 1
chunks = [threads[i:i + chunk_size] for i in range(0, len(threads), chunk_size)]
for i, chunk in enumerate(chunks):
[t.start() for t in chunk]
[t.join() for t in chunk]

def script_migrate_fact_scoring():
uids = get_users_uid()
print(f"script_migrate_fact_scoring {len(uids)} users")
chunk_size = 10
for i in range(0, len(uids), chunk_size):
script_migrate_fact_scoring_users(uids[i: i + chunk_size])
print(f"[progress] migrating {i+1}/{len(uids)}...")


if __name__ == '__main__':
script_migrate_users()
2 changes: 1 addition & 1 deletion backend/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ def new_facts_extractor(
user_name, facts_str = get_prompt_facts(uid)

content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
if not content or len(content) < 100: # less than 20 words, probably nothing
if not content or len(content) < 25: # less than 5 words, probably nothing
return []
# TODO: later, focus a lot on user said things, rn is hard because of speech profile accuracy
# TODO: include negative facts too? Things the user doesn't like?
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/memories/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def get_prompt_facts(uid: str) -> str:

def get_prompt_data(uid: str) -> Tuple[str, List[Fact], List[Fact]]:
# TODO: cache this
existing_facts = facts_db.get_facts(uid, limit=250) # TODO: what the fuck to do here? too much context for llm
existing_facts = facts_db.get_facts(uid, limit=100)
user_made = [Fact(**fact) for fact in existing_facts if fact['manually_added']]
# TODO: filter only reviewed True
generated = [Fact(**fact) for fact in existing_facts if not fact['manually_added']]
Expand Down
22 changes: 22 additions & 0 deletions backend/utils/memories/process_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from models.memory import *
from models.task import Task, TaskStatus, TaskAction, TaskActionProvider
from models.trend import Trend
from models.notification_message import NotificationMessage
from utils.apps import get_available_apps
from utils.llm import obtain_emotional_message, retrieve_metadata_fields_from_transcript
from utils.llm import summarize_open_glass, get_transcript_structure, generate_embedding, \
Expand Down Expand Up @@ -128,8 +129,29 @@ def _extract_facts(uid: str, memory: Memory):
for fact in new_facts:
parsed_facts.append(FactDB.from_fact(fact, uid, memory.id, memory.structured.category))
print('_extract_facts:', fact.category.value.upper(), '|', fact.content)
if len(parsed_facts) == 0:
return

facts_db.save_facts(uid, [fact.dict() for fact in parsed_facts])

# send notification
token = notification_db.get_token_only(uid)
if token and len(token) > 0:
send_new_facts_notification(token, parsed_facts)

def send_new_facts_notification(token: str, facts: [FactDB]):
facts_str = ", ".join([fact.content for fact in facts])
message = f"New facts {facts_str}"
ai_message = NotificationMessage(
navigate_to="/facts",
text=message,
from_integration='false',
type='text',
notification_type='new_fact',
)

send_notification(token, "Omi" + ' says', message, NotificationMessage.get_message_as_dict(ai_message))


def _extract_trends(memory: Memory):
extracted_items = trends_extractor(memory)
Expand Down

0 comments on commit e25db62

Please sign in to comment.