File size: 3,821 Bytes
572e0e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Inference Proxy</title>
  <!-- Include Tailwind CSS -->
  <script src="https://cdn.tailwindcss.com"></script>
  <!-- Include Prism.js for syntax highlighting -->
  <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/themes/prism-tomorrow.min.css" rel="stylesheet" />
  <style>
    code {
      font-size: 0.75rem !important;
    }
  </style>
</head>
<body class="bg-gray-50 text-gray-800 font-sans antialiased">
  <div class="max-w-4xl mx-auto px-4 py-8 md:py-12">
    <header class="mb-10">
      <h1 class="text-3xl md:text-4xl font-bold text-gray-800 mb-2">Inference Proxy</h1>
      <div class="h-1 w-20 bg-gray-300 rounded"></div>
    </header>
    
    <main>
      <section class="mb-8">
        <h2 class="text-xl md:text-2xl font-semibold text-gray-800 mb-4">Setup</h2>
        
        <p class="mb-6 text-gray-700">This proxy captures and stores traces from LLM API requests to your personal Hugging Face dataset.</p>
        
        <div class="space-y-6">
          <div class="bg-white rounded-lg shadow-md p-6">
            <h3 class="text-lg font-medium text-gray-800 mb-3">1. Duplicate Space</h3>
            <p class="text-gray-600 mb-4">First, duplicate this space to your account to set up your own instance.</p>
          </div>
          
          <div class="bg-white rounded-lg shadow-md p-6">
            <h3 class="text-lg font-medium text-gray-800 mb-3">2. Set Environment Variables</h3>
            <p class="text-gray-600 mb-4">Configure these required environment variables in your space settings:</p>
            <div class="bg-gray-100 p-3 rounded-md mb-4">
              <code class="text-sm text-gray-700">HF_ACCESS_TOKEN=your_huggingface_token</code>
            </div>
            <div class="bg-gray-100 p-3 rounded-md">
              <code class="text-sm text-gray-700">USER_NAME=your_huggingface_username</code>
            </div>
          </div>
        </div>
      </section>
      
      <section class="mb-8">
        <h2 class="text-xl md:text-2xl font-semibold text-gray-800 mb-4">Example Usage</h2>
        <div class="bg-gray-800 rounded-lg shadow-lg overflow-hidden">
          <div class="flex items-center px-4 py-2 bg-gray-900">
            <div class="flex space-x-2 mr-2">
              <div class="w-3 h-3 rounded-full bg-red-500"></div>
              <div class="w-3 h-3 rounded-full bg-yellow-500"></div>
              <div class="w-3 h-3 rounded-full bg-green-500"></div>
            </div>
            <p class="text-xs text-gray-400">JavaScript</p>
          </div>
          <pre class="p-4 overflow-x-auto text-xs font-mono"><code class="language-javascript">import { OpenAI } from "openai";

const client = new OpenAI({
  <span class="bg-yellow-700 px-1 rounded">baseURL: "{{HOST_URL}}/fireworks-ai/inference/v1",</span>
  apiKey: process.env.HF_API_KEY,
});

let out = "";

const stream = await client.chat.completions.create({
  model: "accounts/fireworks/models/deepseek-v3",
  messages: [
    {
      role: "user",
      content: "What is the capital of France?",
    },
  ],
  stream: true,
  max_tokens: 500,
});

for await (const chunk of stream) {
  if (chunk.choices && chunk.choices.length > 0) {
    const newContent = chunk.choices[0].delta.content;
    out += newContent;
    console.log(newContent);
  }  
}</code></pre>
        </div>
      </section>
    </main>
  </div>

  <!-- Include Prism.js JavaScript for syntax highlighting -->
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/components/prism-core.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.25.0/plugins/autoloader/prism-autoloader.min.js"></script>
</body>
</html>